diff --git a/desktop_env/envs/desktop_env.py b/desktop_env/envs/desktop_env.py index 330fe2d..ed2f8c4 100644 --- a/desktop_env/envs/desktop_env.py +++ b/desktop_env/envs/desktop_env.py @@ -26,11 +26,19 @@ Getter = Callable[[gym.Env, Dict[str, Any]], Any] def _execute_command(command: List[str]) -> None: - if command[:4] == ["vmrun", "-T", "ws", "start"]: + def _is_contained_in(a, b): + for v in set(a): + if a.count(v) > b.count(v): + return False + return True + + # Specially handled for the `vmrun` command in Windows + if _is_contained_in(["vmrun", "-T", "ws", "start"], command): p = subprocess.Popen(command) p.wait() else: - result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=60, text=True, encoding="utf-8") + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=60, text=True, + encoding="utf-8") if result.returncode != 0: raise Exception("\033[91m" + result.stdout + result.stderr + "\033[0m") return result.stdout @@ -132,7 +140,8 @@ class DesktopEnv(gym.Env): output = _execute_command(["vmrun", "-T", "ws", "getGuestIPAddress", self.path_to_vm]).strip() logger.info(f"IP address: {output}") return output - except: + except Exception as e: + print(e) time.sleep(5) logger.info("Retrying...") raise Exception("Failed to get VM IP address!") @@ -175,14 +184,14 @@ class DesktopEnv(gym.Env): # even if one of the metrics does not need expected or options field, it should be included in the list with None self.evaluator = task_config["evaluator"] self.metric: Metric = [getattr(metrics, func) for func in self.evaluator["func"]] \ - if isinstance(self.evaluator["func"], list) \ - else getattr(metrics, self.evaluator["func"]) + if isinstance(self.evaluator["func"], list) \ + else getattr(metrics, self.evaluator["func"]) self.metric_conj: str = self.evaluator.get("conj", "and") # take conjunction of multiple metrics if "result" in self.evaluator: self.result_getter: Getter = [getattr(getters, "get_{:}".format(res["type"])) for res in - self.evaluator["result"]] \ - if isinstance(self.evaluator["result"], list) \ - else getattr(getters, "get_{:}".format(self.evaluator["result"]["type"])) + self.evaluator["result"]] \ + if isinstance(self.evaluator["result"], list) \ + else getattr(getters, "get_{:}".format(self.evaluator["result"]["type"])) else: self.result_getter = [None] * len(self.metric) \ if isinstance(self.metric, list) \ @@ -302,8 +311,8 @@ class DesktopEnv(gym.Env): self.setup_controller.setup(self.evaluator.get("postconfig", [])) - if self.metric == "infeasible": - if self.action_history[-1] == "FAIL": + if self.evaluator['func'] == "infeasible": + if len(self.action_history) > 0 and self.action_history[-1] == "FAIL": return 1 else: return 0 diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index ccdda25..c3e80b3 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -121,3 +121,7 @@ from .vscode import ( check_json_settings, check_json_keybindings ) + + +def infeasible(): + pass diff --git a/evaluation_examples/examples/chrome/3720f614-37fd-4d04-8a6b-76f54f8c222d.json b/evaluation_examples/examples/chrome/3720f614-37fd-4d04-8a6b-76f54f8c222d.json index ee0b3c5..3b380d7 100644 --- a/evaluation_examples/examples/chrome/3720f614-37fd-4d04-8a6b-76f54f8c222d.json +++ b/evaluation_examples/examples/chrome/3720f614-37fd-4d04-8a6b-76f54f8c222d.json @@ -10,10 +10,6 @@ "chrome" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/chrome/93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9.json b/evaluation_examples/examples/chrome/93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9.json index 331a46e..4bac6b3 100644 --- a/evaluation_examples/examples/chrome/93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9.json +++ b/evaluation_examples/examples/chrome/93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9.json @@ -10,10 +10,6 @@ "chrome" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/chrome/ae78f875-5b98-4907-bbb5-9c737fc68c03.json b/evaluation_examples/examples/chrome/ae78f875-5b98-4907-bbb5-9c737fc68c03.json index e19fdc5..fae40e0 100644 --- a/evaluation_examples/examples/chrome/ae78f875-5b98-4907-bbb5-9c737fc68c03.json +++ b/evaluation_examples/examples/chrome/ae78f875-5b98-4907-bbb5-9c737fc68c03.json @@ -10,10 +10,6 @@ "chrome" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/gimp/38f48d40-764e-4e77-a7cf-51dfce880291.json b/evaluation_examples/examples/gimp/38f48d40-764e-4e77-a7cf-51dfce880291.json index 7bfd43f..a64532a 100644 --- a/evaluation_examples/examples/gimp/38f48d40-764e-4e77-a7cf-51dfce880291.json +++ b/evaluation_examples/examples/gimp/38f48d40-764e-4e77-a7cf-51dfce880291.json @@ -21,10 +21,6 @@ "gimp" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/gimp/5ca86c6f-f317-49d8-b6a7-b527541caae8.json b/evaluation_examples/examples/gimp/5ca86c6f-f317-49d8-b6a7-b527541caae8.json index d81e38c..b15416d 100644 --- a/evaluation_examples/examples/gimp/5ca86c6f-f317-49d8-b6a7-b527541caae8.json +++ b/evaluation_examples/examples/gimp/5ca86c6f-f317-49d8-b6a7-b527541caae8.json @@ -10,10 +10,6 @@ "gimp" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/gimp/62f7fd55-0687-4a43-b6e1-3eda16fc6252.json b/evaluation_examples/examples/gimp/62f7fd55-0687-4a43-b6e1-3eda16fc6252.json index 767648c..e3f1c08 100644 --- a/evaluation_examples/examples/gimp/62f7fd55-0687-4a43-b6e1-3eda16fc6252.json +++ b/evaluation_examples/examples/gimp/62f7fd55-0687-4a43-b6e1-3eda16fc6252.json @@ -21,10 +21,6 @@ "gimp" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/gimp/e19bd559-633b-4b02-940f-d946248f088e.json b/evaluation_examples/examples/gimp/e19bd559-633b-4b02-940f-d946248f088e.json index 92300d9..97b54a4 100644 --- a/evaluation_examples/examples/gimp/e19bd559-633b-4b02-940f-d946248f088e.json +++ b/evaluation_examples/examples/gimp/e19bd559-633b-4b02-940f-d946248f088e.json @@ -10,10 +10,6 @@ "gimp" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/gimp/fbb548ca-c2a6-4601-9204-e39a2efc507b.json b/evaluation_examples/examples/gimp/fbb548ca-c2a6-4601-9204-e39a2efc507b.json index b8addf8..01318d5 100644 --- a/evaluation_examples/examples/gimp/fbb548ca-c2a6-4601-9204-e39a2efc507b.json +++ b/evaluation_examples/examples/gimp/fbb548ca-c2a6-4601-9204-e39a2efc507b.json @@ -10,10 +10,6 @@ "gimp" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/libreoffice_writer/bb8ccc78-479f-4a2f-a71e-d565e439436b.json b/evaluation_examples/examples/libreoffice_writer/bb8ccc78-479f-4a2f-a71e-d565e439436b.json index 90177e5..52e64d3 100644 --- a/evaluation_examples/examples/libreoffice_writer/bb8ccc78-479f-4a2f-a71e-d565e439436b.json +++ b/evaluation_examples/examples/libreoffice_writer/bb8ccc78-479f-4a2f-a71e-d565e439436b.json @@ -27,8 +27,6 @@ "libreoffice_writer" ], "evaluator": { - "func": "infeasible", - "result": { - } + "func": "infeasible" } } \ No newline at end of file diff --git a/evaluation_examples/examples/os/4783cc41-c03c-4e1b-89b4-50658f642bd5.json b/evaluation_examples/examples/os/4783cc41-c03c-4e1b-89b4-50658f642bd5.json index 4981df0..1382a94 100644 --- a/evaluation_examples/examples/os/4783cc41-c03c-4e1b-89b4-50658f642bd5.json +++ b/evaluation_examples/examples/os/4783cc41-c03c-4e1b-89b4-50658f642bd5.json @@ -27,10 +27,6 @@ "os" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/os/a462a795-fdc7-4b23-b689-e8b6df786b78.json b/evaluation_examples/examples/os/a462a795-fdc7-4b23-b689-e8b6df786b78.json index 19dc17f..537e48d 100644 --- a/evaluation_examples/examples/os/a462a795-fdc7-4b23-b689-e8b6df786b78.json +++ b/evaluation_examples/examples/os/a462a795-fdc7-4b23-b689-e8b6df786b78.json @@ -27,10 +27,6 @@ "os" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/os/b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json b/evaluation_examples/examples/os/b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json index d7bb963..dd2a37e 100644 --- a/evaluation_examples/examples/os/b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json +++ b/evaluation_examples/examples/os/b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json @@ -9,10 +9,6 @@ "os" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } \ No newline at end of file diff --git a/evaluation_examples/examples/os/c288e301-e626-4b98-a1ab-159dcb162af5.json b/evaluation_examples/examples/os/c288e301-e626-4b98-a1ab-159dcb162af5.json index 85cde22..e444d0c 100644 --- a/evaluation_examples/examples/os/c288e301-e626-4b98-a1ab-159dcb162af5.json +++ b/evaluation_examples/examples/os/c288e301-e626-4b98-a1ab-159dcb162af5.json @@ -8,10 +8,6 @@ "os" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } \ No newline at end of file diff --git a/evaluation_examples/examples/os/fe41f596-a71b-4c2f-9b2f-9dcd40b568c3.json b/evaluation_examples/examples/os/fe41f596-a71b-4c2f-9b2f-9dcd40b568c3.json index b20f794..7a0a070 100644 --- a/evaluation_examples/examples/os/fe41f596-a71b-4c2f-9b2f-9dcd40b568c3.json +++ b/evaluation_examples/examples/os/fe41f596-a71b-4c2f-9b2f-9dcd40b568c3.json @@ -10,10 +10,6 @@ "os" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } \ No newline at end of file diff --git a/evaluation_examples/examples/thunderbird/99146c54-4f37-4ab8-9327-5f3291665e1e.json b/evaluation_examples/examples/thunderbird/99146c54-4f37-4ab8-9327-5f3291665e1e.json index b5e42b0..1df08f6 100644 --- a/evaluation_examples/examples/thunderbird/99146c54-4f37-4ab8-9327-5f3291665e1e.json +++ b/evaluation_examples/examples/thunderbird/99146c54-4f37-4ab8-9327-5f3291665e1e.json @@ -43,10 +43,6 @@ "thunderbird" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/vlc/5ac2891a-eacd-4954-b339-98abba077adb.json b/evaluation_examples/examples/vlc/5ac2891a-eacd-4954-b339-98abba077adb.json index 5e45906..cc13526 100644 --- a/evaluation_examples/examples/vlc/5ac2891a-eacd-4954-b339-98abba077adb.json +++ b/evaluation_examples/examples/vlc/5ac2891a-eacd-4954-b339-98abba077adb.json @@ -16,10 +16,6 @@ "vlc" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/vlc/7882ed6e-bece-4bf0-bada-c32dc1ddae72.json b/evaluation_examples/examples/vlc/7882ed6e-bece-4bf0-bada-c32dc1ddae72.json index 199e6b0..ba7df32 100644 --- a/evaluation_examples/examples/vlc/7882ed6e-bece-4bf0-bada-c32dc1ddae72.json +++ b/evaluation_examples/examples/vlc/7882ed6e-bece-4bf0-bada-c32dc1ddae72.json @@ -16,10 +16,6 @@ "vlc" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/vlc/cb130f0d-d36f-4302-9838-b3baf46139b6.json b/evaluation_examples/examples/vlc/cb130f0d-d36f-4302-9838-b3baf46139b6.json index 851f49a..7937817 100644 --- a/evaluation_examples/examples/vlc/cb130f0d-d36f-4302-9838-b3baf46139b6.json +++ b/evaluation_examples/examples/vlc/cb130f0d-d36f-4302-9838-b3baf46139b6.json @@ -16,10 +16,6 @@ "vlc" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/vs_code/7aeae0e2-70ee-4705-821d-1bba5d5b2ddd.json b/evaluation_examples/examples/vs_code/7aeae0e2-70ee-4705-821d-1bba5d5b2ddd.json index 8511936..d7e99ec 100644 --- a/evaluation_examples/examples/vs_code/7aeae0e2-70ee-4705-821d-1bba5d5b2ddd.json +++ b/evaluation_examples/examples/vs_code/7aeae0e2-70ee-4705-821d-1bba5d5b2ddd.json @@ -36,10 +36,6 @@ "vscode" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/vs_code/7c4cc09e-7a92-40dd-8338-b2286535c4ed.json b/evaluation_examples/examples/vs_code/7c4cc09e-7a92-40dd-8338-b2286535c4ed.json index a86d94c..d218899 100644 --- a/evaluation_examples/examples/vs_code/7c4cc09e-7a92-40dd-8338-b2286535c4ed.json +++ b/evaluation_examples/examples/vs_code/7c4cc09e-7a92-40dd-8338-b2286535c4ed.json @@ -24,10 +24,6 @@ "vscode" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/vs_code/847a96b6-df94-4927-97e6-8cc9ea66ced7.json b/evaluation_examples/examples/vs_code/847a96b6-df94-4927-97e6-8cc9ea66ced7.json index 0abf52a..da1ed29 100644 --- a/evaluation_examples/examples/vs_code/847a96b6-df94-4927-97e6-8cc9ea66ced7.json +++ b/evaluation_examples/examples/vs_code/847a96b6-df94-4927-97e6-8cc9ea66ced7.json @@ -59,10 +59,6 @@ "vscode" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/vs_code/971cbb5b-3cbf-4ff7-9e24-b5c84fcebfa6.json b/evaluation_examples/examples/vs_code/971cbb5b-3cbf-4ff7-9e24-b5c84fcebfa6.json index 889f8fa..add5087 100644 --- a/evaluation_examples/examples/vs_code/971cbb5b-3cbf-4ff7-9e24-b5c84fcebfa6.json +++ b/evaluation_examples/examples/vs_code/971cbb5b-3cbf-4ff7-9e24-b5c84fcebfa6.json @@ -24,10 +24,6 @@ "vscode" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } } diff --git a/evaluation_examples/examples/vs_code/dcbe20e8-647f-4f1d-8696-f1c5bbb570e3.json b/evaluation_examples/examples/vs_code/dcbe20e8-647f-4f1d-8696-f1c5bbb570e3.json index 63e1f4f..7503b10 100644 --- a/evaluation_examples/examples/vs_code/dcbe20e8-647f-4f1d-8696-f1c5bbb570e3.json +++ b/evaluation_examples/examples/vs_code/dcbe20e8-647f-4f1d-8696-f1c5bbb570e3.json @@ -24,10 +24,6 @@ "vscode" ], "evaluator": { - "func": "infeasible", - "result": { - }, - "expected": { - } + "func": "infeasible" } }