From aca6f968149b2b6d6fa4e5fb368b5e22167014f3 Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Tue, 20 Feb 2024 20:08:59 +0800 Subject: [PATCH 1/5] Initialize the release version for README.md --- README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.md b/README.md index 2578a41..a9fbbf4 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,14 @@ # DesktopEnv: An Environment towards Human-like Computer Task Mastery +

+ SLOGAN +

+ +

+Website • +Paper +

+ +![Overview](media/overview.png) ## Updates - 2024-03-01: From 807818c9a234d2732c817d745926a8c3423ccc83 Mon Sep 17 00:00:00 2001 From: David Chang Date: Tue, 20 Feb 2024 23:29:13 +0800 Subject: [PATCH 2/5] ver Feb20thv2 corrected several SheetCopilot tasks fixed load_charts --- desktop_env/evaluators/metrics/table.py | 5 +++- desktop_env/evaluators/metrics/utils.py | 25 ++++++---------- .../035f41ba-6653-43ab-aa63-c86d449d62e5.json | 2 +- .../26a8440e-c166-4c50-aef4-bfb77314b46b.json | 6 ++-- .../28047f4a-d877-4bea-95f7-e42b1c919957.json | 14 ++++----- .../30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json | 29 +++++++++++++++---- .../42e0a640-4f19-4b28-973d-729602b5a4a7.json | 6 ++-- 7 files changed, 51 insertions(+), 36 deletions(-) diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py index e2c105c..82ad0ac 100644 --- a/desktop_env/evaluators/metrics/table.py +++ b/desktop_env/evaluators/metrics/table.py @@ -144,7 +144,10 @@ def compare_table(result: str, expected: str = None, **options) -> float: metric: bool = sheet1.equals(sheet2) logger.debug("Sheet1: \n%s", str(sheet1)) logger.debug("Sheet2: \n%s", str(sheet2)) - logger.debug("Sheet1 =v= Sheet2: \n%s", str(sheet1==sheet2)) + try: + logger.debug("Sheet1 =v= Sheet2: \n%s", str(sheet1==sheet2)) + except: + logger.debug("Sheet1 =/v= Sheet2") logger.debug("Assertion: %s =v= %s - %s", r["sheet_idx0"], r["sheet_idx1"], metric) # }}} Compare Sheet Data by Internal Value # diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index 55a6e8e..0747726 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -119,22 +119,15 @@ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, An for ch in charts: series: List[str] = [] for ser in ch.series: - value_num = ser.val.numRef.f \ - if hasattr(ser.val, "numRef") and hasattr(ser.val.numRef, "f") \ - else "" - value_str = ser.val.strRef.f \ - if hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f") \ - else "" - categ_num = ser.cat.numRef.f \ - if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f") \ - else "" - categ_str = ser.cat.strRef.f \ - if hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f") \ - else "" - series.append("{:},{:},{:},{:}".format(value_num, value_str - , categ_num, categ_str - ) - ) + if hasattr(ser.val, "numRef") and hasattr(ser.val.numRef, "f"): + value_str: str = ser.val.numRef.f + elif hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f"): + value_str: str = ser.val.strRef.f + if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f"): + categ_str: str = ser.cat.numRef.f + elif hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f"): + categ_str: str = ser.cat.strRef.f + series.append("{:},{:}".format(value_str, categ_str)) series: str = ";".join(series) # TODO: maybe more aspects, like chart type diff --git a/evaluation_examples/examples/sheetcopilot/035f41ba-6653-43ab-aa63-c86d449d62e5.json b/evaluation_examples/examples/sheetcopilot/035f41ba-6653-43ab-aa63-c86d449d62e5.json index a267101..1f309e8 100644 --- a/evaluation_examples/examples/sheetcopilot/035f41ba-6653-43ab-aa63-c86d449d62e5.json +++ b/evaluation_examples/examples/sheetcopilot/035f41ba-6653-43ab-aa63-c86d449d62e5.json @@ -1,7 +1,7 @@ { "id": "035f41ba-6653-43ab-aa63-c86d449d62e5", "snapshot": "libreoffice_calc", - "instruction": "Help me fill in the Gross profit column and set the results as accounting type. Then under column A named \"Year_Profit\" in a new sheet, display the Year Column in Sheet 1 as text appended by a \"_\" with the corresponding Gross Profit value.", + "instruction": "Help me fill in the Gross profit column. Then under column A named \"Year_Profit\" in a new sheet, display the Year Column in Sheet 1 as text appended by a \"_\" with the corresponding Gross Profit value.", "source": "SheetCopilot@92", "config": [ { diff --git a/evaluation_examples/examples/sheetcopilot/26a8440e-c166-4c50-aef4-bfb77314b46b.json b/evaluation_examples/examples/sheetcopilot/26a8440e-c166-4c50-aef4-bfb77314b46b.json index edf3dc3..4e96b4d 100644 --- a/evaluation_examples/examples/sheetcopilot/26a8440e-c166-4c50-aef4-bfb77314b46b.json +++ b/evaluation_examples/examples/sheetcopilot/26a8440e-c166-4c50-aef4-bfb77314b46b.json @@ -73,10 +73,10 @@ "rules": [ { "type": "sheet_data", - "sheet_idx0": 0, - "sheet_idx1": "EI0" + "sheet_idx0": "RNSheet2", + "sheet_idx1": "ENSheet2" } ] } } -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/sheetcopilot/28047f4a-d877-4bea-95f7-e42b1c919957.json b/evaluation_examples/examples/sheetcopilot/28047f4a-d877-4bea-95f7-e42b1c919957.json index c1fb986..3ab267e 100644 --- a/evaluation_examples/examples/sheetcopilot/28047f4a-d877-4bea-95f7-e42b1c919957.json +++ b/evaluation_examples/examples/sheetcopilot/28047f4a-d877-4bea-95f7-e42b1c919957.json @@ -1,7 +1,7 @@ { "id": "28047f4a-d877-4bea-95f7-e42b1c919957", "snapshot": "libreoffice_calc", - "instruction": "Summarize the sales for each product and illustrate it with a bar chart in a new sheet.", + "instruction": "Summarize the sales for each product and illustrate it with a bar chart in a new sheet (Sheet2).", "source": "SheetCopilot@108", "config": [ { @@ -54,7 +54,7 @@ { "type": "sleep", "parameters": { - "seconds": 0.5 + "seconds": 2.0 } } ], @@ -73,16 +73,16 @@ "rules": [ { "type": "chart", - "sheet_idx0": 0, - "sheet_idx1": "EI0", + "sheet_idx0": "RNSheet2", + "sheet_idx1": "ENSheet2", "chart_props": [ "type" ] }, { "type": "pivot_table", - "sheet_idx0": 0, - "sheet_idx1": "EI0", + "sheet_idx0": "RNSheet2", + "sheet_idx1": "ENSheet2", "pivot_props": [ "col_fields", "filter", @@ -93,4 +93,4 @@ ] } } -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json b/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json index dd32682..ad9c17f 100644 --- a/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json +++ b/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json @@ -73,21 +73,40 @@ "rules": [ { "type": "sheet_data", - "sheet_idx0": 0, - "sheet_idx1": "EI0" + "sheet_idx0": "RNSheet2", + "sheet_idx1": "ENSheet2" }, { "type": "pivot_table", - "sheet_idx0": 0, - "sheet_idx1": "EI0", + "sheet_idx0": "RNSheet2", + "sheet_idx1": "ENSheet2", "pivot_props": [ "col_fields", "filter", "row_fields", "data_fields" ] + }, + { + "type": "check_cell", + "sheet_idx": "RNSheet2", + "coordinate": "A1", + "props": { + "bgcolor": { + "method": "eq", + "ref": "FF0000FF" + }, + "font_color": { + "method": "eq", + "ref": "FFFFFFFF" + }, + "font_bold": { + "method": "eq", + "ref": true + } + } } ] } } -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/sheetcopilot/42e0a640-4f19-4b28-973d-729602b5a4a7.json b/evaluation_examples/examples/sheetcopilot/42e0a640-4f19-4b28-973d-729602b5a4a7.json index e608701..2f33e42 100644 --- a/evaluation_examples/examples/sheetcopilot/42e0a640-4f19-4b28-973d-729602b5a4a7.json +++ b/evaluation_examples/examples/sheetcopilot/42e0a640-4f19-4b28-973d-729602b5a4a7.json @@ -73,10 +73,10 @@ "rules": [ { "type": "sheet_data", - "sheet_idx0": 0, - "sheet_idx1": "EI0" + "sheet_idx0": "RNSheet2", + "sheet_idx1": "ENSheet2" } ] } } -} \ No newline at end of file +} From 5eea8b7daf3ad91c5e3aed689a3a7c71c02f8049 Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 21 Feb 2024 14:56:34 +0800 Subject: [PATCH 3/5] ver Feb21st updated several sheetcopilot tasks --- .../447b9505-7a2f-4863-9dd1-69395482eb4b.json | 11 ++++++----- .../4de54231-e4b5-49e3-b2ba-61a0bec721c0.json | 4 ++-- .../51719eea-10bc-4246-a428-ac7c433dd4b3.json | 16 ++++------------ .../535364ea-05bd-46ea-9937-9f55c68507e8.json | 16 ++++------------ 4 files changed, 16 insertions(+), 31 deletions(-) diff --git a/evaluation_examples/examples/sheetcopilot/447b9505-7a2f-4863-9dd1-69395482eb4b.json b/evaluation_examples/examples/sheetcopilot/447b9505-7a2f-4863-9dd1-69395482eb4b.json index c53bb07..58b3286 100644 --- a/evaluation_examples/examples/sheetcopilot/447b9505-7a2f-4863-9dd1-69395482eb4b.json +++ b/evaluation_examples/examples/sheetcopilot/447b9505-7a2f-4863-9dd1-69395482eb4b.json @@ -1,7 +1,7 @@ { "id": "447b9505-7a2f-4863-9dd1-69395482eb4b", "snapshot": "libreoffice_calc", - "instruction": "Divide the values through column C to P by 1 million and put the results along with the year headers in a new sheet. Set the results type as accounting.", + "instruction": "Divide the values through column C to P by 1 million and put the results along with the year headers in a new sheet.", "source": "SheetCopilot@28", "config": [ { @@ -54,7 +54,7 @@ { "type": "sleep", "parameters": { - "seconds": 0.5 + "seconds": 10 } } ], @@ -73,10 +73,11 @@ "rules": [ { "type": "sheet_data", - "sheet_idx0": 0, - "sheet_idx1": "EI0" + "sheet_idx0": "RNSheet2", + "sheet_idx1": "ENSheet2", + "precision": 2 } ] } } -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/sheetcopilot/4de54231-e4b5-49e3-b2ba-61a0bec721c0.json b/evaluation_examples/examples/sheetcopilot/4de54231-e4b5-49e3-b2ba-61a0bec721c0.json index bb8ea76..3a29eea 100644 --- a/evaluation_examples/examples/sheetcopilot/4de54231-e4b5-49e3-b2ba-61a0bec721c0.json +++ b/evaluation_examples/examples/sheetcopilot/4de54231-e4b5-49e3-b2ba-61a0bec721c0.json @@ -1,7 +1,7 @@ { "id": "4de54231-e4b5-49e3-b2ba-61a0bec721c0", "snapshot": "libreoffice_calc", - "instruction": "I have compute the acceleration in row 2 and I want you to fill out other rows for column B and E. Next concatenate the values from columns A to D, including their headers (the pattern is \"Header: cell value, ..., Header: cell value\"), into a new column named \"Combined Data\" for all rows.", + "instruction": "I have compute the acceleration in row 2 and I want you to fill out other rows for column B and D. Next concatenate the values from columns A to D, including their headers (the pattern is \"Header: cell value, ..., Header: cell value\"), into a new column named \"Combined Data\" for all rows. In the new column, only keep 2 decimal digits.", "source": "SheetCopilot@147", "config": [ { @@ -79,4 +79,4 @@ ] } } -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/sheetcopilot/51719eea-10bc-4246-a428-ac7c433dd4b3.json b/evaluation_examples/examples/sheetcopilot/51719eea-10bc-4246-a428-ac7c433dd4b3.json index 4098230..90faf05 100644 --- a/evaluation_examples/examples/sheetcopilot/51719eea-10bc-4246-a428-ac7c433dd4b3.json +++ b/evaluation_examples/examples/sheetcopilot/51719eea-10bc-4246-a428-ac7c433dd4b3.json @@ -1,7 +1,7 @@ { "id": "51719eea-10bc-4246-a428-ac7c433dd4b3", "snapshot": "libreoffice_calc", - "instruction": "Calculate revenue and generate a Pivot Table in a new sheet that summarizes the revenue of each product. In the same sheet, create a bar chart to display the table with data labels on top of the bars.", + "instruction": "Calculate revenue in a new column and generate a Pivot Table in a new sheet (Sheet2) that summarizes the revenue of each product.", "source": "SheetCopilot@7", "config": [ { @@ -71,18 +71,10 @@ }, "options": { "rules": [ - { - "type": "chart", - "sheet_idx0": 0, - "sheet_idx1": "EI0", - "chart_props": [ - "type" - ] - }, { "type": "pivot_table", - "sheet_idx0": 0, - "sheet_idx1": "EI0", + "sheet_idx0": "RNSheet2", + "sheet_idx1": "ENSheet2", "pivot_props": [ "col_fields", "filter", @@ -93,4 +85,4 @@ ] } } -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/sheetcopilot/535364ea-05bd-46ea-9937-9f55c68507e8.json b/evaluation_examples/examples/sheetcopilot/535364ea-05bd-46ea-9937-9f55c68507e8.json index 34a5c0c..8bf2502 100644 --- a/evaluation_examples/examples/sheetcopilot/535364ea-05bd-46ea-9937-9f55c68507e8.json +++ b/evaluation_examples/examples/sheetcopilot/535364ea-05bd-46ea-9937-9f55c68507e8.json @@ -1,7 +1,7 @@ { "id": "535364ea-05bd-46ea-9937-9f55c68507e8", "snapshot": "libreoffice_calc", - "instruction": "Create two tables in a new sheet showing the total revenue for each product and sales channel. Plot a horizontal bar chart for the former and a pie chart for the latter in the new sheet.", + "instruction": "Create two tables in a new sheet showing the total revenue for each product and sales channel.", "source": "SheetCopilot@180", "config": [ { @@ -71,18 +71,10 @@ }, "options": { "rules": [ - { - "type": "chart", - "sheet_idx0": 0, - "sheet_idx1": "EI0", - "chart_props": [ - "type" - ] - }, { "type": "pivot_table", - "sheet_idx0": 0, - "sheet_idx1": "EI0", + "sheet_idx0": "RNSheet2", + "sheet_idx1": "ENSheet2", "pivot_props": [ "col_fields", "filter", @@ -93,4 +85,4 @@ ] } } -} \ No newline at end of file +} From e1cf8da4e0ea7820466b64063b52b5254d9d1294 Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Wed, 21 Feb 2024 21:22:12 +0800 Subject: [PATCH 4/5] Fix the infeasible examples support --- desktop_env/envs/desktop_env.py | 29 ++++++++++++------- desktop_env/evaluators/metrics/__init__.py | 4 +++ .../3720f614-37fd-4d04-8a6b-76f54f8c222d.json | 6 +--- .../93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9.json | 6 +--- .../ae78f875-5b98-4907-bbb5-9c737fc68c03.json | 6 +--- .../38f48d40-764e-4e77-a7cf-51dfce880291.json | 6 +--- .../5ca86c6f-f317-49d8-b6a7-b527541caae8.json | 6 +--- .../62f7fd55-0687-4a43-b6e1-3eda16fc6252.json | 6 +--- .../e19bd559-633b-4b02-940f-d946248f088e.json | 6 +--- .../fbb548ca-c2a6-4601-9204-e39a2efc507b.json | 6 +--- .../bb8ccc78-479f-4a2f-a71e-d565e439436b.json | 4 +-- .../4783cc41-c03c-4e1b-89b4-50658f642bd5.json | 6 +--- .../a462a795-fdc7-4b23-b689-e8b6df786b78.json | 6 +--- .../b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json | 6 +--- .../c288e301-e626-4b98-a1ab-159dcb162af5.json | 6 +--- .../fe41f596-a71b-4c2f-9b2f-9dcd40b568c3.json | 6 +--- .../99146c54-4f37-4ab8-9327-5f3291665e1e.json | 6 +--- .../5ac2891a-eacd-4954-b339-98abba077adb.json | 6 +--- .../7882ed6e-bece-4bf0-bada-c32dc1ddae72.json | 6 +--- .../cb130f0d-d36f-4302-9838-b3baf46139b6.json | 6 +--- .../7aeae0e2-70ee-4705-821d-1bba5d5b2ddd.json | 6 +--- .../7c4cc09e-7a92-40dd-8338-b2286535c4ed.json | 6 +--- .../847a96b6-df94-4927-97e6-8cc9ea66ced7.json | 6 +--- .../971cbb5b-3cbf-4ff7-9e24-b5c84fcebfa6.json | 6 +--- .../dcbe20e8-647f-4f1d-8696-f1c5bbb570e3.json | 6 +--- 25 files changed, 46 insertions(+), 123 deletions(-) diff --git a/desktop_env/envs/desktop_env.py b/desktop_env/envs/desktop_env.py index 330fe2d..ed2f8c4 100644 --- a/desktop_env/envs/desktop_env.py +++ b/desktop_env/envs/desktop_env.py @@ -26,11 +26,19 @@ Getter = Callable[[gym.Env, Dict[str, Any]], Any] def _execute_command(command: List[str]) -> None: - if command[:4] == ["vmrun", "-T", "ws", "start"]: + def _is_contained_in(a, b): + for v in set(a): + if a.count(v) > b.count(v): + return False + return True + + # Specially handled for the `vmrun` command in Windows + if _is_contained_in(["vmrun", "-T", "ws", "start"], command): p = subprocess.Popen(command) p.wait() else: - result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=60, text=True, encoding="utf-8") + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=60, text=True, + encoding="utf-8") if result.returncode != 0: raise Exception("\033[91m" + result.stdout + result.stderr + "\033[0m") return result.stdout @@ -132,7 +140,8 @@ class DesktopEnv(gym.Env): output = _execute_command(["vmrun", "-T", "ws", "getGuestIPAddress", self.path_to_vm]).strip() logger.info(f"IP address: {output}") return output - except: + except Exception as e: + print(e) time.sleep(5) logger.info("Retrying...") raise Exception("Failed to get VM IP address!") @@ -175,14 +184,14 @@ class DesktopEnv(gym.Env): # even if one of the metrics does not need expected or options field, it should be included in the list with None self.evaluator = task_config["evaluator"] self.metric: Metric = [getattr(metrics, func) for func in self.evaluator["func"]] \ - if isinstance(self.evaluator["func"], list) \ - else getattr(metrics, self.evaluator["func"]) + if isinstance(self.evaluator["func"], list) \ + else getattr(metrics, self.evaluator["func"]) self.metric_conj: str = self.evaluator.get("conj", "and") # take conjunction of multiple metrics if "result" in self.evaluator: self.result_getter: Getter = [getattr(getters, "get_{:}".format(res["type"])) for res in - self.evaluator["result"]] \ - if isinstance(self.evaluator["result"], list) \ - else getattr(getters, "get_{:}".format(self.evaluator["result"]["type"])) + self.evaluator["result"]] \ + if isinstance(self.evaluator["result"], list) \ + else getattr(getters, "get_{:}".format(self.evaluator["result"]["type"])) else: self.result_getter = [None] * len(self.metric) \ if isinstance(self.metric, list) \ @@ -302,8 +311,8 @@ class DesktopEnv(gym.Env): self.setup_controller.setup(self.evaluator.get("postconfig", [])) - if self.metric == "infeasible": - if self.action_history[-1] == "FAIL": + if self.evaluator['func'] == "infeasible": + if len(self.action_history) > 0 and self.action_history[-1] == "FAIL": return 1 else: return 0 diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index ccdda25..c3e80b3 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -121,3 +121,7 @@ from .vscode import ( check_json_settings, check_json_keybindings ) + + +def infeasible(): + pass diff --git a/evaluation_examples/examples/chrome/3720f614-37fd-4d04-8a6b-76f54f8c222d.json b/evaluation_examples/examples/chrome/3720f614-37fd-4d04-8a6b-76f54f8c222d.json index ee0b3c5..3b380d7 100644 --- a/evaluation_examples/examples/chrome/3720f614-37fd-4d04-8a6b-76f54f8c222d.json +++ b/evaluation_examples/examples/chrome/3720f614-37fd-4d04-8a6b-76f54f8c222d.json @@ -10,10 +10,6 @@ "chrome" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/chrome/93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9.json b/evaluation_examples/examples/chrome/93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9.json index 331a46e..4bac6b3 100644 --- a/evaluation_examples/examples/chrome/93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9.json +++ b/evaluation_examples/examples/chrome/93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9.json @@ -10,10 +10,6 @@ "chrome" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/chrome/ae78f875-5b98-4907-bbb5-9c737fc68c03.json b/evaluation_examples/examples/chrome/ae78f875-5b98-4907-bbb5-9c737fc68c03.json index e19fdc5..fae40e0 100644 --- a/evaluation_examples/examples/chrome/ae78f875-5b98-4907-bbb5-9c737fc68c03.json +++ b/evaluation_examples/examples/chrome/ae78f875-5b98-4907-bbb5-9c737fc68c03.json @@ -10,10 +10,6 @@ "chrome" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/gimp/38f48d40-764e-4e77-a7cf-51dfce880291.json b/evaluation_examples/examples/gimp/38f48d40-764e-4e77-a7cf-51dfce880291.json index 7bfd43f..a64532a 100644 --- a/evaluation_examples/examples/gimp/38f48d40-764e-4e77-a7cf-51dfce880291.json +++ b/evaluation_examples/examples/gimp/38f48d40-764e-4e77-a7cf-51dfce880291.json @@ -21,10 +21,6 @@ "gimp" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/gimp/5ca86c6f-f317-49d8-b6a7-b527541caae8.json b/evaluation_examples/examples/gimp/5ca86c6f-f317-49d8-b6a7-b527541caae8.json index d81e38c..b15416d 100644 --- a/evaluation_examples/examples/gimp/5ca86c6f-f317-49d8-b6a7-b527541caae8.json +++ b/evaluation_examples/examples/gimp/5ca86c6f-f317-49d8-b6a7-b527541caae8.json @@ -10,10 +10,6 @@ "gimp" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/gimp/62f7fd55-0687-4a43-b6e1-3eda16fc6252.json b/evaluation_examples/examples/gimp/62f7fd55-0687-4a43-b6e1-3eda16fc6252.json index 767648c..e3f1c08 100644 --- a/evaluation_examples/examples/gimp/62f7fd55-0687-4a43-b6e1-3eda16fc6252.json +++ b/evaluation_examples/examples/gimp/62f7fd55-0687-4a43-b6e1-3eda16fc6252.json @@ -21,10 +21,6 @@ "gimp" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/gimp/e19bd559-633b-4b02-940f-d946248f088e.json b/evaluation_examples/examples/gimp/e19bd559-633b-4b02-940f-d946248f088e.json index 92300d9..97b54a4 100644 --- a/evaluation_examples/examples/gimp/e19bd559-633b-4b02-940f-d946248f088e.json +++ b/evaluation_examples/examples/gimp/e19bd559-633b-4b02-940f-d946248f088e.json @@ -10,10 +10,6 @@ "gimp" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/gimp/fbb548ca-c2a6-4601-9204-e39a2efc507b.json b/evaluation_examples/examples/gimp/fbb548ca-c2a6-4601-9204-e39a2efc507b.json index b8addf8..01318d5 100644 --- a/evaluation_examples/examples/gimp/fbb548ca-c2a6-4601-9204-e39a2efc507b.json +++ b/evaluation_examples/examples/gimp/fbb548ca-c2a6-4601-9204-e39a2efc507b.json @@ -10,10 +10,6 @@ "gimp" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/libreoffice_writer/bb8ccc78-479f-4a2f-a71e-d565e439436b.json b/evaluation_examples/examples/libreoffice_writer/bb8ccc78-479f-4a2f-a71e-d565e439436b.json index 90177e5..52e64d3 100644 --- a/evaluation_examples/examples/libreoffice_writer/bb8ccc78-479f-4a2f-a71e-d565e439436b.json +++ b/evaluation_examples/examples/libreoffice_writer/bb8ccc78-479f-4a2f-a71e-d565e439436b.json @@ -27,8 +27,6 @@ "libreoffice_writer" ], "evaluator": { - "func": "infeasible", - "result": { - } + "func": "infeasible" } } \ No newline at end of file diff --git a/evaluation_examples/examples/os/4783cc41-c03c-4e1b-89b4-50658f642bd5.json b/evaluation_examples/examples/os/4783cc41-c03c-4e1b-89b4-50658f642bd5.json index 4981df0..1382a94 100644 --- a/evaluation_examples/examples/os/4783cc41-c03c-4e1b-89b4-50658f642bd5.json +++ b/evaluation_examples/examples/os/4783cc41-c03c-4e1b-89b4-50658f642bd5.json @@ -27,10 +27,6 @@ "os" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/os/a462a795-fdc7-4b23-b689-e8b6df786b78.json b/evaluation_examples/examples/os/a462a795-fdc7-4b23-b689-e8b6df786b78.json index 19dc17f..537e48d 100644 --- a/evaluation_examples/examples/os/a462a795-fdc7-4b23-b689-e8b6df786b78.json +++ b/evaluation_examples/examples/os/a462a795-fdc7-4b23-b689-e8b6df786b78.json @@ -27,10 +27,6 @@ "os" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/os/b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json b/evaluation_examples/examples/os/b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json index d7bb963..dd2a37e 100644 --- a/evaluation_examples/examples/os/b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json +++ b/evaluation_examples/examples/os/b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json @@ -9,10 +9,6 @@ "os" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } \ No newline at end of file diff --git a/evaluation_examples/examples/os/c288e301-e626-4b98-a1ab-159dcb162af5.json b/evaluation_examples/examples/os/c288e301-e626-4b98-a1ab-159dcb162af5.json index 85cde22..e444d0c 100644 --- a/evaluation_examples/examples/os/c288e301-e626-4b98-a1ab-159dcb162af5.json +++ b/evaluation_examples/examples/os/c288e301-e626-4b98-a1ab-159dcb162af5.json @@ -8,10 +8,6 @@ "os" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } \ No newline at end of file diff --git a/evaluation_examples/examples/os/fe41f596-a71b-4c2f-9b2f-9dcd40b568c3.json b/evaluation_examples/examples/os/fe41f596-a71b-4c2f-9b2f-9dcd40b568c3.json index b20f794..7a0a070 100644 --- a/evaluation_examples/examples/os/fe41f596-a71b-4c2f-9b2f-9dcd40b568c3.json +++ b/evaluation_examples/examples/os/fe41f596-a71b-4c2f-9b2f-9dcd40b568c3.json @@ -10,10 +10,6 @@ "os" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } \ No newline at end of file diff --git a/evaluation_examples/examples/thunderbird/99146c54-4f37-4ab8-9327-5f3291665e1e.json b/evaluation_examples/examples/thunderbird/99146c54-4f37-4ab8-9327-5f3291665e1e.json index b5e42b0..1df08f6 100644 --- a/evaluation_examples/examples/thunderbird/99146c54-4f37-4ab8-9327-5f3291665e1e.json +++ b/evaluation_examples/examples/thunderbird/99146c54-4f37-4ab8-9327-5f3291665e1e.json @@ -43,10 +43,6 @@ "thunderbird" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/vlc/5ac2891a-eacd-4954-b339-98abba077adb.json b/evaluation_examples/examples/vlc/5ac2891a-eacd-4954-b339-98abba077adb.json index 5e45906..cc13526 100644 --- a/evaluation_examples/examples/vlc/5ac2891a-eacd-4954-b339-98abba077adb.json +++ b/evaluation_examples/examples/vlc/5ac2891a-eacd-4954-b339-98abba077adb.json @@ -16,10 +16,6 @@ "vlc" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/vlc/7882ed6e-bece-4bf0-bada-c32dc1ddae72.json b/evaluation_examples/examples/vlc/7882ed6e-bece-4bf0-bada-c32dc1ddae72.json index 199e6b0..ba7df32 100644 --- a/evaluation_examples/examples/vlc/7882ed6e-bece-4bf0-bada-c32dc1ddae72.json +++ b/evaluation_examples/examples/vlc/7882ed6e-bece-4bf0-bada-c32dc1ddae72.json @@ -16,10 +16,6 @@ "vlc" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/vlc/cb130f0d-d36f-4302-9838-b3baf46139b6.json b/evaluation_examples/examples/vlc/cb130f0d-d36f-4302-9838-b3baf46139b6.json index 851f49a..7937817 100644 --- a/evaluation_examples/examples/vlc/cb130f0d-d36f-4302-9838-b3baf46139b6.json +++ b/evaluation_examples/examples/vlc/cb130f0d-d36f-4302-9838-b3baf46139b6.json @@ -16,10 +16,6 @@ "vlc" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/vs_code/7aeae0e2-70ee-4705-821d-1bba5d5b2ddd.json b/evaluation_examples/examples/vs_code/7aeae0e2-70ee-4705-821d-1bba5d5b2ddd.json index 8511936..d7e99ec 100644 --- a/evaluation_examples/examples/vs_code/7aeae0e2-70ee-4705-821d-1bba5d5b2ddd.json +++ b/evaluation_examples/examples/vs_code/7aeae0e2-70ee-4705-821d-1bba5d5b2ddd.json @@ -36,10 +36,6 @@ "vscode" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/vs_code/7c4cc09e-7a92-40dd-8338-b2286535c4ed.json b/evaluation_examples/examples/vs_code/7c4cc09e-7a92-40dd-8338-b2286535c4ed.json index a86d94c..d218899 100644 --- a/evaluation_examples/examples/vs_code/7c4cc09e-7a92-40dd-8338-b2286535c4ed.json +++ b/evaluation_examples/examples/vs_code/7c4cc09e-7a92-40dd-8338-b2286535c4ed.json @@ -24,10 +24,6 @@ "vscode" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/vs_code/847a96b6-df94-4927-97e6-8cc9ea66ced7.json b/evaluation_examples/examples/vs_code/847a96b6-df94-4927-97e6-8cc9ea66ced7.json index 0abf52a..da1ed29 100644 --- a/evaluation_examples/examples/vs_code/847a96b6-df94-4927-97e6-8cc9ea66ced7.json +++ b/evaluation_examples/examples/vs_code/847a96b6-df94-4927-97e6-8cc9ea66ced7.json @@ -59,10 +59,6 @@ "vscode" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/vs_code/971cbb5b-3cbf-4ff7-9e24-b5c84fcebfa6.json b/evaluation_examples/examples/vs_code/971cbb5b-3cbf-4ff7-9e24-b5c84fcebfa6.json index 889f8fa..add5087 100644 --- a/evaluation_examples/examples/vs_code/971cbb5b-3cbf-4ff7-9e24-b5c84fcebfa6.json +++ b/evaluation_examples/examples/vs_code/971cbb5b-3cbf-4ff7-9e24-b5c84fcebfa6.json @@ -24,10 +24,6 @@ "vscode" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/vs_code/dcbe20e8-647f-4f1d-8696-f1c5bbb570e3.json b/evaluation_examples/examples/vs_code/dcbe20e8-647f-4f1d-8696-f1c5bbb570e3.json index 63e1f4f..7503b10 100644 --- a/evaluation_examples/examples/vs_code/dcbe20e8-647f-4f1d-8696-f1c5bbb570e3.json +++ b/evaluation_examples/examples/vs_code/dcbe20e8-647f-4f1d-8696-f1c5bbb570e3.json @@ -24,10 +24,6 @@ "vscode" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } From 91bc795de182d21504d3ce0a691ee12e234fd118 Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Thu, 22 Feb 2024 00:04:02 +0800 Subject: [PATCH 5/5] Examine and load new batch of OS examples from NL2Bash --- .../37887e8c-da15-4192-923c-08fa390a176d.json | 2 +- .../4127319a-8b79-4410-b58a-7a151e15f3d7.json | 9 ++++++- .../4d117223-a354-47fb-8b45-62ab1390a95f.json | 10 ++++++-- .../5c1075ca-bb34-46a3-a7a0-029bd7463e79.json | 25 ++++++++++++++++--- .../5ced85fc-fa1a-4217-95fd-0fb530545ce2.json | 15 ++++++++--- .../6f56bf42-85b8-4fbb-8e06-6c44960184ba.json | 20 +++++++++++++-- 6 files changed, 69 insertions(+), 12 deletions(-) diff --git a/evaluation_examples/examples/os/37887e8c-da15-4192-923c-08fa390a176d.json b/evaluation_examples/examples/os/37887e8c-da15-4192-923c-08fa390a176d.json index 76e5ec6..9f4be8c 100644 --- a/evaluation_examples/examples/os/37887e8c-da15-4192-923c-08fa390a176d.json +++ b/evaluation_examples/examples/os/37887e8c-da15-4192-923c-08fa390a176d.json @@ -41,7 +41,7 @@ "parameters": { "files": [ { - "url": "https://drive.usercontent.google.com/download?id=1DakvqJfSokEPuH8_LYfSCBbM7Fws1F0o&export=download&authuser=0&confirm=t&uuid=4950eb71-7881-4b52-a94c-a3eed9d2213f&at=APZUnTUy4o4r1ScCnTgwPwNyqhPr:1707992479376", + "url": "https://drive.usercontent.google.com/download?id=1DakvqJfSokEPuH8_LYfSCBbM7Fws1F0o&export=download&authuser=0&confirm=t&uuid=42428619-921a-4405-abbf-e5fa7ea0e1b7&at=APZUnTX6nAbw51hosLfUD5CftRg6:1708528738860", "path": "eval.sh" } ] diff --git a/evaluation_examples/examples/os/4127319a-8b79-4410-b58a-7a151e15f3d7.json b/evaluation_examples/examples/os/4127319a-8b79-4410-b58a-7a151e15f3d7.json index c1ed512..31d573f 100644 --- a/evaluation_examples/examples/os/4127319a-8b79-4410-b58a-7a151e15f3d7.json +++ b/evaluation_examples/examples/os/4127319a-8b79-4410-b58a-7a151e15f3d7.json @@ -25,7 +25,14 @@ { "type": "execute", "parameters": { - "command": "./setup.sh", + "command": "bash ./setup.sh", + "shell": true + } + }, + { + "type": "execute", + "parameters": { + "command": "xdg-open /home/user/project", "shell": true } } diff --git a/evaluation_examples/examples/os/4d117223-a354-47fb-8b45-62ab1390a95f.json b/evaluation_examples/examples/os/4d117223-a354-47fb-8b45-62ab1390a95f.json index e1af8b6..9e69eba 100644 --- a/evaluation_examples/examples/os/4d117223-a354-47fb-8b45-62ab1390a95f.json +++ b/evaluation_examples/examples/os/4d117223-a354-47fb-8b45-62ab1390a95f.json @@ -36,17 +36,23 @@ "command": [ "python", "-c", - "import pyautogui; import time; pyautogui.click(960, 540); time.sleep(0.5);" + "import pyautogui; import time; pyautogui.click(960, 540); time.sleep(0.5); pyautogui.hotkey('ctrl', 'alt', 't'); time.sleep(0.5)" ] } }, + { + "type": "activate_window", + "parameters": { + "window_name": "Terminal" + } + }, { "type": "execute", "parameters": { "command": [ "python", "-c", - "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"alt\", \"t\"); time.sleep(0.5); pyautogui.type(\"cd testDir\"); time.sleep(0.5); pyautogui.press(\"enter\")" + "import pyautogui; import time; time.sleep(0.5); pyautogui.write('cd testDir'); time.sleep(0.5); pyautogui.press('enter'); time.sleep(0.5); pyautogui.write('clear'); time.sleep(0.5); pyautogui.press('enter')" ] } } diff --git a/evaluation_examples/examples/os/5c1075ca-bb34-46a3-a7a0-029bd7463e79.json b/evaluation_examples/examples/os/5c1075ca-bb34-46a3-a7a0-029bd7463e79.json index 7c69d7c..163b49c 100644 --- a/evaluation_examples/examples/os/5c1075ca-bb34-46a3-a7a0-029bd7463e79.json +++ b/evaluation_examples/examples/os/5c1075ca-bb34-46a3-a7a0-029bd7463e79.json @@ -27,7 +27,7 @@ "parameters": { "files": [ { - "url": "https://drive.usercontent.google.com/download?id=1GeXD_pWlqZ7HCco9RorjzJ_f3DeeP91V&export=download&authuser=0&confirm=t&uuid=f4054888-3228-440b-8833-55c50961ea90&at=APZUnTVCiBJw-lRosK673DlvhHyx:1707985591894", + "url": "https://drive.usercontent.google.com/download?id=1GeXD_pWlqZ7HCco9RorjzJ_f3DeeP91V&export=download&authuser=0&confirm=t&uuid=9d69de2c-9908-4f6e-ad21-e8329440ee89&at=APZUnTU0uxOOMOi0HuEo2mRLcyjW:1708524614643", "path": "eval.sh" } ] @@ -50,8 +50,27 @@ { "type": "execute", "parameters": { - "command": "cd test_environment", - "shell": true + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.click(960, 540); time.sleep(0.5); pyautogui.hotkey('ctrl', 'alt', 't'); time.sleep(0.5)" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Terminal" + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; time.sleep(0.5); pyautogui.write('cd test_environment'); time.sleep(0.5); pyautogui.press('enter')" + ] } } ], diff --git a/evaluation_examples/examples/os/5ced85fc-fa1a-4217-95fd-0fb530545ce2.json b/evaluation_examples/examples/os/5ced85fc-fa1a-4217-95fd-0fb530545ce2.json index 48a7e72..770eb56 100644 --- a/evaluation_examples/examples/os/5ced85fc-fa1a-4217-95fd-0fb530545ce2.json +++ b/evaluation_examples/examples/os/5ced85fc-fa1a-4217-95fd-0fb530545ce2.json @@ -7,8 +7,17 @@ { "type": "execute", "parameters": { - "command": "echo -e \"1\\n2\\n3\" > input.txt", - "shell": true + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.click(960, 540); time.sleep(0.5); pyautogui.hotkey('ctrl', 'alt', 't'); time.sleep(0.5)" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Terminal" } } ], @@ -23,7 +32,7 @@ "parameters": { "files": [ { - "url": "https://drive.usercontent.google.com/download?id=174Bk_JLDwuTTgL0hslzxRG4xB76JxXIR&export=download&authuser=0&confirm=t&uuid=b086a8a1-00b9-4d73-ae05-c34a7fe9693d&at=APZUnTV-T0wfR_ovBue_HnO-tcai:1707989145225", + "url": "https://drive.usercontent.google.com/download?id=174Bk_JLDwuTTgL0hslzxRG4xB76JxXIR&export=download&authuser=0&confirm=t&uuid=dc3f716f-e21b-4e46-bfa3-6f78b4c11ed4&at=APZUnTVgdfpiWjfBcp32gAuK57Bl:1708526599813", "path": "eval.sh" } ] diff --git a/evaluation_examples/examples/os/6f56bf42-85b8-4fbb-8e06-6c44960184ba.json b/evaluation_examples/examples/os/6f56bf42-85b8-4fbb-8e06-6c44960184ba.json index 7f010a7..c3051ad 100644 --- a/evaluation_examples/examples/os/6f56bf42-85b8-4fbb-8e06-6c44960184ba.json +++ b/evaluation_examples/examples/os/6f56bf42-85b8-4fbb-8e06-6c44960184ba.json @@ -10,7 +10,7 @@ "parameters": { "files": [ { - "url": "https://drive.usercontent.google.com/download?id=1BpFAuuH0IHxpWLDF3jaFNxa9QDe_F12D&export=download&authuser=0&confirm=t&uuid=4f18ddd0-81be-414b-b701-289544cdb321&at=APZUnTXlaYyRw5kPCIK4HezQJVrO:1708051633417", + "url": "https://drive.usercontent.google.com/download?id=1fQGBoTE40BLoNV_d4KYxbYjKRJdIPDio&export=download&authuser=0&confirm=t&uuid=50f5fff6-4c6b-4ce0-b36e-73f0d4bce8ca&at=APZUnTUA_3uAt14q25NN6dCY5X7e:1708531075515", "path": "setup.sh" } ] @@ -29,6 +29,22 @@ "command": "./setup.sh", "shell": true } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; time.sleep(0.5); pyautogui.click(960, 540); time.sleep(0.5); pyautogui.hotkey('ctrl', 'alt', 't'); time.sleep(0.5)" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Terminal" + } } ], "related_apps": [ @@ -41,7 +57,7 @@ "parameters": { "files": [ { - "url": "https://drive.usercontent.google.com/download?id=1fQGBoTE40BLoNV_d4KYxbYjKRJdIPDio&export=download&authuser=0&confirm=t&uuid=b380dc07-a88b-4a65-affe-f3a77108d310&at=APZUnTVe08Pyr4RSR94AmQK7S8t_:1708051631877", + "url": "https://drive.usercontent.google.com/download?id=1BpFAuuH0IHxpWLDF3jaFNxa9QDe_F12D&export=download&authuser=0&confirm=t&uuid=695e8a3c-f354-4e9d-a4c3-e01f149be7be&at=APZUnTVHmmnZVIYLPCbuiQduSZZX:1708531243191", "path": "eval.sh" } ]