From aca6f968149b2b6d6fa4e5fb368b5e22167014f3 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Tue, 20 Feb 2024 20:08:59 +0800
Subject: [PATCH 1/5] Initialize the release version for README.md

---
 README.md | 10 ++++++++++
 1 file changed, 10 insertions(+)
diff --git a/README.md b/README.md
index 2578a41..a9fbbf4 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,14 @@
 # DesktopEnv: An Environment towards Human-like Computer Task Mastery
+<p align="center">
+    <b>SLOGAN</b>
+</p>
+
+<p align="center">
+<a href="">Website</a> •
+<a href="">Paper</a>
+</p>
+
+![Overview](media/overview.png)
 
 ## Updates
 - 2024-03-01: 

From 807818c9a234d2732c817d745926a8c3423ccc83 Mon Sep 17 00:00:00 2001
From: David Chang <zdy004007@126.com>
Date: Tue, 20 Feb 2024 23:29:13 +0800
Subject: [PATCH 2/5] ver Feb20thv2

corrected several SheetCopilot tasks
fixed load_charts
---
 desktop_env/evaluators/metrics/table.py       |  5 +++-
 desktop_env/evaluators/metrics/utils.py       | 25 ++++++----------
 .../035f41ba-6653-43ab-aa63-c86d449d62e5.json |  2 +-
 .../26a8440e-c166-4c50-aef4-bfb77314b46b.json |  6 ++--
 .../28047f4a-d877-4bea-95f7-e42b1c919957.json | 14 ++++-----
 .../30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json | 29 +++++++++++++++----
 .../42e0a640-4f19-4b28-973d-729602b5a4a7.json |  6 ++--
 7 files changed, 51 insertions(+), 36 deletions(-)

diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py
index e2c105c..82ad0ac 100644
--- a/desktop_env/evaluators/metrics/table.py
+++ b/desktop_env/evaluators/metrics/table.py
@@ -144,7 +144,10 @@ def compare_table(result: str, expected: str = None, **options) -> float:
             metric: bool = sheet1.equals(sheet2)
             logger.debug("Sheet1: \n%s", str(sheet1))
             logger.debug("Sheet2: \n%s", str(sheet2))
-            logger.debug("Sheet1 =v= Sheet2: \n%s", str(sheet1==sheet2))
+            try:
+                logger.debug("Sheet1 =v= Sheet2: \n%s", str(sheet1==sheet2))
+            except:
+                logger.debug("Sheet1 =/v= Sheet2")
             logger.debug("Assertion: %s =v= %s - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
             #  }}} Compare Sheet Data by Internal Value # 
 
diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py
index 55a6e8e..0747726 100644
--- a/desktop_env/evaluators/metrics/utils.py
+++ b/desktop_env/evaluators/metrics/utils.py
@@ -119,22 +119,15 @@ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, An
     for ch in charts:
         series: List[str] = []
         for ser in ch.series:
-            value_num = ser.val.numRef.f \
-                if hasattr(ser.val, "numRef") and hasattr(ser.val.numRef, "f") \
-                else ""
-            value_str = ser.val.strRef.f \
-                if hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f") \
-                else ""
-            categ_num = ser.cat.numRef.f \
-                if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f") \
-                else ""
-            categ_str = ser.cat.strRef.f \
-                if hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f") \
-                else ""
-            series.append("{:},{:},{:},{:}".format(value_num, value_str
-                                                   , categ_num, categ_str
-                                                   )
-                          )
+            if hasattr(ser.val, "numRef") and hasattr(ser.val.numRef, "f"):
+                value_str: str = ser.val.numRef.f
+            elif hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f"):
+                value_str: str = ser.val.strRef.f
+            if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f"):
+                categ_str: str = ser.cat.numRef.f
+            elif hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f"):
+                categ_str: str = ser.cat.strRef.f
+            series.append("{:},{:}".format(value_str, categ_str))
         series: str = ";".join(series)
 
         # TODO: maybe more aspects, like chart type
diff --git a/evaluation_examples/examples/sheetcopilot/035f41ba-6653-43ab-aa63-c86d449d62e5.json b/evaluation_examples/examples/sheetcopilot/035f41ba-6653-43ab-aa63-c86d449d62e5.json
index a267101..1f309e8 100644
--- a/evaluation_examples/examples/sheetcopilot/035f41ba-6653-43ab-aa63-c86d449d62e5.json
+++ b/evaluation_examples/examples/sheetcopilot/035f41ba-6653-43ab-aa63-c86d449d62e5.json
@@ -1,7 +1,7 @@
 {
 	"id": "035f41ba-6653-43ab-aa63-c86d449d62e5",
 	"snapshot": "libreoffice_calc",
-	"instruction": "Help me fill in the Gross profit column and set the results as accounting type. Then under column A named \"Year_Profit\" in a new sheet, display the Year Column in Sheet 1 as text appended by a \"_\" with the corresponding Gross Profit value.",
+	"instruction": "Help me fill in the Gross profit column. Then under column A named \"Year_Profit\" in a new sheet, display the Year Column in Sheet 1 as text appended by a \"_\" with the corresponding Gross Profit value.",
 	"source": "SheetCopilot@92",
 	"config": [
 		{
diff --git a/evaluation_examples/examples/sheetcopilot/26a8440e-c166-4c50-aef4-bfb77314b46b.json b/evaluation_examples/examples/sheetcopilot/26a8440e-c166-4c50-aef4-bfb77314b46b.json
index edf3dc3..4e96b4d 100644
--- a/evaluation_examples/examples/sheetcopilot/26a8440e-c166-4c50-aef4-bfb77314b46b.json
+++ b/evaluation_examples/examples/sheetcopilot/26a8440e-c166-4c50-aef4-bfb77314b46b.json
@@ -73,10 +73,10 @@
 			"rules": [
 				{
 					"type": "sheet_data",
-					"sheet_idx0": 0,
-					"sheet_idx1": "EI0"
+					"sheet_idx0": "RNSheet2",
+					"sheet_idx1": "ENSheet2"
 				}
 			]
 		}
 	}
-}
\ No newline at end of file
+}
diff --git a/evaluation_examples/examples/sheetcopilot/28047f4a-d877-4bea-95f7-e42b1c919957.json b/evaluation_examples/examples/sheetcopilot/28047f4a-d877-4bea-95f7-e42b1c919957.json
index c1fb986..3ab267e 100644
--- a/evaluation_examples/examples/sheetcopilot/28047f4a-d877-4bea-95f7-e42b1c919957.json
+++ b/evaluation_examples/examples/sheetcopilot/28047f4a-d877-4bea-95f7-e42b1c919957.json
@@ -1,7 +1,7 @@
 {
 	"id": "28047f4a-d877-4bea-95f7-e42b1c919957",
 	"snapshot": "libreoffice_calc",
-	"instruction": "Summarize the sales for each product and illustrate it with a bar chart  in a new sheet.",
+	"instruction": "Summarize the sales for each product and illustrate it with a bar chart  in a new sheet (Sheet2).",
 	"source": "SheetCopilot@108",
 	"config": [
 		{
@@ -54,7 +54,7 @@
 			{
 				"type": "sleep",
 				"parameters": {
-					"seconds": 0.5
+					"seconds": 2.0
 				}
 			}
 		],
@@ -73,16 +73,16 @@
 			"rules": [
 				{
 					"type": "chart",
-					"sheet_idx0": 0,
-					"sheet_idx1": "EI0",
+					"sheet_idx0": "RNSheet2",
+					"sheet_idx1": "ENSheet2",
 					"chart_props": [
 						"type"
 					]
 				},
 				{
 					"type": "pivot_table",
-					"sheet_idx0": 0,
-					"sheet_idx1": "EI0",
+					"sheet_idx0": "RNSheet2",
+					"sheet_idx1": "ENSheet2",
 					"pivot_props": [
 						"col_fields",
 						"filter",
@@ -93,4 +93,4 @@
 			]
 		}
 	}
-}
\ No newline at end of file
+}
diff --git a/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json b/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json
index dd32682..ad9c17f 100644
--- a/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json
+++ b/evaluation_examples/examples/sheetcopilot/30e3e107-1cfb-46ee-a755-2cd080d7ba6a.json
@@ -73,21 +73,40 @@
 			"rules": [
 				{
 					"type": "sheet_data",
-					"sheet_idx0": 0,
-					"sheet_idx1": "EI0"
+					"sheet_idx0": "RNSheet2",
+					"sheet_idx1": "ENSheet2"
 				},
 				{
 					"type": "pivot_table",
-					"sheet_idx0": 0,
-					"sheet_idx1": "EI0",
+					"sheet_idx0": "RNSheet2",
+					"sheet_idx1": "ENSheet2",
 					"pivot_props": [
 						"col_fields",
 						"filter",
 						"row_fields",
 						"data_fields"
 					]
+				},
+				{
+					"type": "check_cell",
+					"sheet_idx": "RNSheet2",
+					"coordinate": "A1",
+					"props": {
+						"bgcolor": {
+							"method": "eq",
+							"ref": "FF0000FF"
+						},
+						"font_color": {
+							"method": "eq",
+							"ref": "FFFFFFFF"
+						},
+						"font_bold": {
+							"method": "eq",
+							"ref": true
+						}
+					}
 				}
 			]
 		}
 	}
-}
\ No newline at end of file
+}
diff --git a/evaluation_examples/examples/sheetcopilot/42e0a640-4f19-4b28-973d-729602b5a4a7.json b/evaluation_examples/examples/sheetcopilot/42e0a640-4f19-4b28-973d-729602b5a4a7.json
index e608701..2f33e42 100644
--- a/evaluation_examples/examples/sheetcopilot/42e0a640-4f19-4b28-973d-729602b5a4a7.json
+++ b/evaluation_examples/examples/sheetcopilot/42e0a640-4f19-4b28-973d-729602b5a4a7.json
@@ -73,10 +73,10 @@
 			"rules": [
 				{
 					"type": "sheet_data",
-					"sheet_idx0": 0,
-					"sheet_idx1": "EI0"
+					"sheet_idx0": "RNSheet2",
+					"sheet_idx1": "ENSheet2"
 				}
 			]
 		}
 	}
-}
\ No newline at end of file
+}

From 5eea8b7daf3ad91c5e3aed689a3a7c71c02f8049 Mon Sep 17 00:00:00 2001
From: David Chang <zdy004007@126.com>
Date: Wed, 21 Feb 2024 14:56:34 +0800
Subject: [PATCH 3/5] ver Feb21st

updated several sheetcopilot tasks
---
 .../447b9505-7a2f-4863-9dd1-69395482eb4b.json    | 11 ++++++-----
 .../4de54231-e4b5-49e3-b2ba-61a0bec721c0.json    |  4 ++--
 .../51719eea-10bc-4246-a428-ac7c433dd4b3.json    | 16 ++++------------
 .../535364ea-05bd-46ea-9937-9f55c68507e8.json    | 16 ++++------------
 4 files changed, 16 insertions(+), 31 deletions(-)

diff --git a/evaluation_examples/examples/sheetcopilot/447b9505-7a2f-4863-9dd1-69395482eb4b.json b/evaluation_examples/examples/sheetcopilot/447b9505-7a2f-4863-9dd1-69395482eb4b.json
index c53bb07..58b3286 100644
--- a/evaluation_examples/examples/sheetcopilot/447b9505-7a2f-4863-9dd1-69395482eb4b.json
+++ b/evaluation_examples/examples/sheetcopilot/447b9505-7a2f-4863-9dd1-69395482eb4b.json
@@ -1,7 +1,7 @@
 {
 	"id": "447b9505-7a2f-4863-9dd1-69395482eb4b",
 	"snapshot": "libreoffice_calc",
-	"instruction": "Divide the values through column C to P by 1 million and put the results along with the year headers in a new sheet. Set the results type as accounting.",
+	"instruction": "Divide the values through column C to P by 1 million and put the results along with the year headers in a new sheet.",
 	"source": "SheetCopilot@28",
 	"config": [
 		{
@@ -54,7 +54,7 @@
 			{
 				"type": "sleep",
 				"parameters": {
-					"seconds": 0.5
+					"seconds": 10
 				}
 			}
 		],
@@ -73,10 +73,11 @@
 			"rules": [
 				{
 					"type": "sheet_data",
-					"sheet_idx0": 0,
-					"sheet_idx1": "EI0"
+					"sheet_idx0": "RNSheet2",
+					"sheet_idx1": "ENSheet2",
+					"precision": 2
 				}
 			]
 		}
 	}
-}
\ No newline at end of file
+}
diff --git a/evaluation_examples/examples/sheetcopilot/4de54231-e4b5-49e3-b2ba-61a0bec721c0.json b/evaluation_examples/examples/sheetcopilot/4de54231-e4b5-49e3-b2ba-61a0bec721c0.json
index bb8ea76..3a29eea 100644
--- a/evaluation_examples/examples/sheetcopilot/4de54231-e4b5-49e3-b2ba-61a0bec721c0.json
+++ b/evaluation_examples/examples/sheetcopilot/4de54231-e4b5-49e3-b2ba-61a0bec721c0.json
@@ -1,7 +1,7 @@
 {
 	"id": "4de54231-e4b5-49e3-b2ba-61a0bec721c0",
 	"snapshot": "libreoffice_calc",
-	"instruction": "I have compute the acceleration in row 2 and I want you to fill out other rows for column B and E. Next concatenate the values from columns A to D, including their headers (the pattern is \"Header: cell value, ..., Header: cell value\"), into a new column named \"Combined Data\" for all rows.",
+	"instruction": "I have compute the acceleration in row 2 and I want you to fill out other rows for column B and D. Next concatenate the values from columns A to D, including their headers (the pattern is \"Header: cell value, ..., Header: cell value\"), into a new column named \"Combined Data\" for all rows. In the new column, only keep 2 decimal digits.",
 	"source": "SheetCopilot@147",
 	"config": [
 		{
@@ -79,4 +79,4 @@
 			]
 		}
 	}
-}
\ No newline at end of file
+}
diff --git a/evaluation_examples/examples/sheetcopilot/51719eea-10bc-4246-a428-ac7c433dd4b3.json b/evaluation_examples/examples/sheetcopilot/51719eea-10bc-4246-a428-ac7c433dd4b3.json
index 4098230..90faf05 100644
--- a/evaluation_examples/examples/sheetcopilot/51719eea-10bc-4246-a428-ac7c433dd4b3.json
+++ b/evaluation_examples/examples/sheetcopilot/51719eea-10bc-4246-a428-ac7c433dd4b3.json
@@ -1,7 +1,7 @@
 {
 	"id": "51719eea-10bc-4246-a428-ac7c433dd4b3",
 	"snapshot": "libreoffice_calc",
-	"instruction": "Calculate revenue and generate a Pivot Table in a new sheet that summarizes the revenue of each product. In the same sheet, create a bar chart to display the table with data labels on top of the bars.",
+	"instruction": "Calculate revenue in a new column and generate a Pivot Table in a new sheet (Sheet2) that summarizes the revenue of each product.",
 	"source": "SheetCopilot@7",
 	"config": [
 		{
@@ -71,18 +71,10 @@
 		},
 		"options": {
 			"rules": [
-				{
-					"type": "chart",
-					"sheet_idx0": 0,
-					"sheet_idx1": "EI0",
-					"chart_props": [
-						"type"
-					]
-				},
 				{
 					"type": "pivot_table",
-					"sheet_idx0": 0,
-					"sheet_idx1": "EI0",
+					"sheet_idx0": "RNSheet2",
+					"sheet_idx1": "ENSheet2",
 					"pivot_props": [
 						"col_fields",
 						"filter",
@@ -93,4 +85,4 @@
 			]
 		}
 	}
-}
\ No newline at end of file
+}
diff --git a/evaluation_examples/examples/sheetcopilot/535364ea-05bd-46ea-9937-9f55c68507e8.json b/evaluation_examples/examples/sheetcopilot/535364ea-05bd-46ea-9937-9f55c68507e8.json
index 34a5c0c..8bf2502 100644
--- a/evaluation_examples/examples/sheetcopilot/535364ea-05bd-46ea-9937-9f55c68507e8.json
+++ b/evaluation_examples/examples/sheetcopilot/535364ea-05bd-46ea-9937-9f55c68507e8.json
@@ -1,7 +1,7 @@
 {
 	"id": "535364ea-05bd-46ea-9937-9f55c68507e8",
 	"snapshot": "libreoffice_calc",
-	"instruction": "Create two tables in a new sheet showing the total revenue for each product and sales channel. Plot a horizontal bar chart for the former and a pie chart for the latter in the new sheet.",
+	"instruction": "Create two tables in a new sheet showing the total revenue for each product and sales channel.",
 	"source": "SheetCopilot@180",
 	"config": [
 		{
@@ -71,18 +71,10 @@
 		},
 		"options": {
 			"rules": [
-				{
-					"type": "chart",
-					"sheet_idx0": 0,
-					"sheet_idx1": "EI0",
-					"chart_props": [
-						"type"
-					]
-				},
 				{
 					"type": "pivot_table",
-					"sheet_idx0": 0,
-					"sheet_idx1": "EI0",
+					"sheet_idx0": "RNSheet2",
+					"sheet_idx1": "ENSheet2",
 					"pivot_props": [
 						"col_fields",
 						"filter",
@@ -93,4 +85,4 @@
 			]
 		}
 	}
-}
\ No newline at end of file
+}

From e1cf8da4e0ea7820466b64063b52b5254d9d1294 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Wed, 21 Feb 2024 21:22:12 +0800
Subject: [PATCH 4/5] Fix the infeasible examples support

---
 desktop_env/envs/desktop_env.py               | 29 ++++++++++++-------
 desktop_env/evaluators/metrics/__init__.py    |  4 +++
 .../3720f614-37fd-4d04-8a6b-76f54f8c222d.json |  6 +---
 .../93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9.json |  6 +---
 .../ae78f875-5b98-4907-bbb5-9c737fc68c03.json |  6 +---
 .../38f48d40-764e-4e77-a7cf-51dfce880291.json |  6 +---
 .../5ca86c6f-f317-49d8-b6a7-b527541caae8.json |  6 +---
 .../62f7fd55-0687-4a43-b6e1-3eda16fc6252.json |  6 +---
 .../e19bd559-633b-4b02-940f-d946248f088e.json |  6 +---
 .../fbb548ca-c2a6-4601-9204-e39a2efc507b.json |  6 +---
 .../bb8ccc78-479f-4a2f-a71e-d565e439436b.json |  4 +--
 .../4783cc41-c03c-4e1b-89b4-50658f642bd5.json |  6 +---
 .../a462a795-fdc7-4b23-b689-e8b6df786b78.json |  6 +---
 .../b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json |  6 +---
 .../c288e301-e626-4b98-a1ab-159dcb162af5.json |  6 +---
 .../fe41f596-a71b-4c2f-9b2f-9dcd40b568c3.json |  6 +---
 .../99146c54-4f37-4ab8-9327-5f3291665e1e.json |  6 +---
 .../5ac2891a-eacd-4954-b339-98abba077adb.json |  6 +---
 .../7882ed6e-bece-4bf0-bada-c32dc1ddae72.json |  6 +---
 .../cb130f0d-d36f-4302-9838-b3baf46139b6.json |  6 +---
 .../7aeae0e2-70ee-4705-821d-1bba5d5b2ddd.json |  6 +---
 .../7c4cc09e-7a92-40dd-8338-b2286535c4ed.json |  6 +---
 .../847a96b6-df94-4927-97e6-8cc9ea66ced7.json |  6 +---
 .../971cbb5b-3cbf-4ff7-9e24-b5c84fcebfa6.json |  6 +---
 .../dcbe20e8-647f-4f1d-8696-f1c5bbb570e3.json |  6 +---
 25 files changed, 46 insertions(+), 123 deletions(-)

diff --git a/desktop_env/envs/desktop_env.py b/desktop_env/envs/desktop_env.py
index 330fe2d..ed2f8c4 100644
--- a/desktop_env/envs/desktop_env.py
+++ b/desktop_env/envs/desktop_env.py
@@ -26,11 +26,19 @@ Getter = Callable[[gym.Env, Dict[str, Any]], Any]
 
 
 def _execute_command(command: List[str]) -> None:
-    if command[:4] == ["vmrun", "-T", "ws", "start"]:
+    def _is_contained_in(a, b):
+        for v in set(a):
+            if a.count(v) > b.count(v):
+                return False
+        return True
+
+    # Specially handled for the `vmrun` command in Windows
+    if _is_contained_in(["vmrun", "-T", "ws", "start"], command):
         p = subprocess.Popen(command)
         p.wait()
     else:
-        result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=60, text=True, encoding="utf-8")
+        result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=60, text=True,
+                                encoding="utf-8")
         if result.returncode != 0:
             raise Exception("\033[91m" + result.stdout + result.stderr + "\033[0m")
         return result.stdout
@@ -132,7 +140,8 @@ class DesktopEnv(gym.Env):
                 output = _execute_command(["vmrun", "-T", "ws", "getGuestIPAddress", self.path_to_vm]).strip()
                 logger.info(f"IP address: {output}")
                 return output
-            except:
+            except Exception as e:
+                print(e)
                 time.sleep(5)
                 logger.info("Retrying...")
         raise Exception("Failed to get VM IP address!")
@@ -175,14 +184,14 @@ class DesktopEnv(gym.Env):
         # even if one of the metrics does not need expected or options field, it should be included in the list with None
         self.evaluator = task_config["evaluator"]
         self.metric: Metric = [getattr(metrics, func) for func in self.evaluator["func"]] \
-        if isinstance(self.evaluator["func"], list) \
-        else getattr(metrics, self.evaluator["func"])
+            if isinstance(self.evaluator["func"], list) \
+            else getattr(metrics, self.evaluator["func"])
         self.metric_conj: str = self.evaluator.get("conj", "and")  # take conjunction of multiple metrics
         if "result" in self.evaluator:
             self.result_getter: Getter = [getattr(getters, "get_{:}".format(res["type"])) for res in
-                                      self.evaluator["result"]] \
-            if isinstance(self.evaluator["result"], list) \
-            else getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
+                                          self.evaluator["result"]] \
+                if isinstance(self.evaluator["result"], list) \
+                else getattr(getters, "get_{:}".format(self.evaluator["result"]["type"]))
         else:
             self.result_getter = [None] * len(self.metric) \
                 if isinstance(self.metric, list) \
@@ -302,8 +311,8 @@ class DesktopEnv(gym.Env):
 
         self.setup_controller.setup(self.evaluator.get("postconfig", []))
 
-        if self.metric == "infeasible":
-            if self.action_history[-1] == "FAIL":
+        if self.evaluator['func'] == "infeasible":
+            if len(self.action_history) > 0 and self.action_history[-1] == "FAIL":
                 return 1
             else:
                 return 0
diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py
index ccdda25..c3e80b3 100644
--- a/desktop_env/evaluators/metrics/__init__.py
+++ b/desktop_env/evaluators/metrics/__init__.py
@@ -121,3 +121,7 @@ from .vscode import (
     check_json_settings,
     check_json_keybindings
 )
+
+
+def infeasible():
+    pass
diff --git a/evaluation_examples/examples/chrome/3720f614-37fd-4d04-8a6b-76f54f8c222d.json b/evaluation_examples/examples/chrome/3720f614-37fd-4d04-8a6b-76f54f8c222d.json
index ee0b3c5..3b380d7 100644
--- a/evaluation_examples/examples/chrome/3720f614-37fd-4d04-8a6b-76f54f8c222d.json
+++ b/evaluation_examples/examples/chrome/3720f614-37fd-4d04-8a6b-76f54f8c222d.json
@@ -10,10 +10,6 @@
     "chrome"
   ],
   "evaluator": {
-    "func": "infeasible",
-    "result": {
-    },
-    "expected": {
-    }
+    "func": "infeasible"
   }
 }
diff --git a/evaluation_examples/examples/chrome/93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9.json b/evaluation_examples/examples/chrome/93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9.json
index 331a46e..4bac6b3 100644
--- a/evaluation_examples/examples/chrome/93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9.json
+++ b/evaluation_examples/examples/chrome/93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9.json
@@ -10,10 +10,6 @@
     "chrome"
   ],
   "evaluator": {
-    "func": "infeasible",
-    "result": {
-    },
-    "expected": {
-    }
+    "func": "infeasible"
   }
 }
diff --git a/evaluation_examples/examples/chrome/ae78f875-5b98-4907-bbb5-9c737fc68c03.json b/evaluation_examples/examples/chrome/ae78f875-5b98-4907-bbb5-9c737fc68c03.json
index e19fdc5..fae40e0 100644
--- a/evaluation_examples/examples/chrome/ae78f875-5b98-4907-bbb5-9c737fc68c03.json
+++ b/evaluation_examples/examples/chrome/ae78f875-5b98-4907-bbb5-9c737fc68c03.json
@@ -10,10 +10,6 @@
     "chrome"
   ],
   "evaluator": {
-    "func": "infeasible",
-    "result": {
-    },
-    "expected": {
-    }
+    "func": "infeasible"
   }
 }
diff --git a/evaluation_examples/examples/gimp/38f48d40-764e-4e77-a7cf-51dfce880291.json b/evaluation_examples/examples/gimp/38f48d40-764e-4e77-a7cf-51dfce880291.json
index 7bfd43f..a64532a 100644
--- a/evaluation_examples/examples/gimp/38f48d40-764e-4e77-a7cf-51dfce880291.json
+++ b/evaluation_examples/examples/gimp/38f48d40-764e-4e77-a7cf-51dfce880291.json
@@ -21,10 +21,6 @@
     "gimp"
   ],
   "evaluator": {
-    "func": "infeasible",
-    "result": {
-    },
-    "expected": {
-    }
+    "func": "infeasible"
   }
 }
diff --git a/evaluation_examples/examples/gimp/5ca86c6f-f317-49d8-b6a7-b527541caae8.json b/evaluation_examples/examples/gimp/5ca86c6f-f317-49d8-b6a7-b527541caae8.json
index d81e38c..b15416d 100644
--- a/evaluation_examples/examples/gimp/5ca86c6f-f317-49d8-b6a7-b527541caae8.json
+++ b/evaluation_examples/examples/gimp/5ca86c6f-f317-49d8-b6a7-b527541caae8.json
@@ -10,10 +10,6 @@
     "gimp"
   ],
   "evaluator": {
-    "func": "infeasible",
-    "result": {
-    },
-    "expected": {
-    }
+    "func": "infeasible"
   }
 }
diff --git a/evaluation_examples/examples/gimp/62f7fd55-0687-4a43-b6e1-3eda16fc6252.json b/evaluation_examples/examples/gimp/62f7fd55-0687-4a43-b6e1-3eda16fc6252.json
index 767648c..e3f1c08 100644
--- a/evaluation_examples/examples/gimp/62f7fd55-0687-4a43-b6e1-3eda16fc6252.json
+++ b/evaluation_examples/examples/gimp/62f7fd55-0687-4a43-b6e1-3eda16fc6252.json
@@ -21,10 +21,6 @@
     "gimp"
   ],
   "evaluator": {
-    "func": "infeasible",
-    "result": {
-    },
-    "expected": {
-    }
+    "func": "infeasible"
   }
 }
diff --git a/evaluation_examples/examples/gimp/e19bd559-633b-4b02-940f-d946248f088e.json b/evaluation_examples/examples/gimp/e19bd559-633b-4b02-940f-d946248f088e.json
index 92300d9..97b54a4 100644
--- a/evaluation_examples/examples/gimp/e19bd559-633b-4b02-940f-d946248f088e.json
+++ b/evaluation_examples/examples/gimp/e19bd559-633b-4b02-940f-d946248f088e.json
@@ -10,10 +10,6 @@
     "gimp"
   ],
   "evaluator": {
-    "func": "infeasible",
-    "result": {
-    },
-    "expected": {
-    }
+    "func": "infeasible"
   }
 }
diff --git a/evaluation_examples/examples/gimp/fbb548ca-c2a6-4601-9204-e39a2efc507b.json b/evaluation_examples/examples/gimp/fbb548ca-c2a6-4601-9204-e39a2efc507b.json
index b8addf8..01318d5 100644
--- a/evaluation_examples/examples/gimp/fbb548ca-c2a6-4601-9204-e39a2efc507b.json
+++ b/evaluation_examples/examples/gimp/fbb548ca-c2a6-4601-9204-e39a2efc507b.json
@@ -10,10 +10,6 @@
     "gimp"
   ],
   "evaluator": {
-    "func": "infeasible",
-    "result": {
-    },
-    "expected": {
-    }
+    "func": "infeasible"
   }
 }
diff --git a/evaluation_examples/examples/libreoffice_writer/bb8ccc78-479f-4a2f-a71e-d565e439436b.json b/evaluation_examples/examples/libreoffice_writer/bb8ccc78-479f-4a2f-a71e-d565e439436b.json
index 90177e5..52e64d3 100644
--- a/evaluation_examples/examples/libreoffice_writer/bb8ccc78-479f-4a2f-a71e-d565e439436b.json
+++ b/evaluation_examples/examples/libreoffice_writer/bb8ccc78-479f-4a2f-a71e-d565e439436b.json
@@ -27,8 +27,6 @@
     "libreoffice_writer"
   ],
   "evaluator": {
-    "func": "infeasible",
-    "result": {
-    }
+    "func": "infeasible"
   }
 }
\ No newline at end of file
diff --git a/evaluation_examples/examples/os/4783cc41-c03c-4e1b-89b4-50658f642bd5.json b/evaluation_examples/examples/os/4783cc41-c03c-4e1b-89b4-50658f642bd5.json
index 4981df0..1382a94 100644
--- a/evaluation_examples/examples/os/4783cc41-c03c-4e1b-89b4-50658f642bd5.json
+++ b/evaluation_examples/examples/os/4783cc41-c03c-4e1b-89b4-50658f642bd5.json
@@ -27,10 +27,6 @@
     "os"
   ],
   "evaluator": {
-    "func": "infeasible",
-    "result": {
-    },
-    "expected": {
-    }
+    "func": "infeasible"
   }
 }
diff --git a/evaluation_examples/examples/os/a462a795-fdc7-4b23-b689-e8b6df786b78.json b/evaluation_examples/examples/os/a462a795-fdc7-4b23-b689-e8b6df786b78.json
index 19dc17f..537e48d 100644
--- a/evaluation_examples/examples/os/a462a795-fdc7-4b23-b689-e8b6df786b78.json
+++ b/evaluation_examples/examples/os/a462a795-fdc7-4b23-b689-e8b6df786b78.json
@@ -27,10 +27,6 @@
     "os"
   ],
   "evaluator": {
-    "func": "infeasible",
-    "result": {
-    },
-    "expected": {
-    }
+    "func": "infeasible"
   }
 }
diff --git a/evaluation_examples/examples/os/b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json b/evaluation_examples/examples/os/b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json
index d7bb963..dd2a37e 100644
--- a/evaluation_examples/examples/os/b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json
+++ b/evaluation_examples/examples/os/b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa.json
@@ -9,10 +9,6 @@
     "os"
   ],
   "evaluator": {
-    "func": "infeasible",
-    "result": {
-    },
-    "expected": {
-    }
+    "func": "infeasible"
   }
 }
\ No newline at end of file
diff --git a/evaluation_examples/examples/os/c288e301-e626-4b98-a1ab-159dcb162af5.json b/evaluation_examples/examples/os/c288e301-e626-4b98-a1ab-159dcb162af5.json
index 85cde22..e444d0c 100644
--- a/evaluation_examples/examples/os/c288e301-e626-4b98-a1ab-159dcb162af5.json
+++ b/evaluation_examples/examples/os/c288e301-e626-4b98-a1ab-159dcb162af5.json
@@ -8,10 +8,6 @@
       "os"
     ],
     "evaluator": {
-      "func": "infeasible",
-      "result": {
-      },
-      "expected": {
-      }
+      "func": "infeasible"
     }
   }
\ No newline at end of file
diff --git a/evaluation_examples/examples/os/fe41f596-a71b-4c2f-9b2f-9dcd40b568c3.json b/evaluation_examples/examples/os/fe41f596-a71b-4c2f-9b2f-9dcd40b568c3.json
index b20f794..7a0a070 100644
--- a/evaluation_examples/examples/os/fe41f596-a71b-4c2f-9b2f-9dcd40b568c3.json
+++ b/evaluation_examples/examples/os/fe41f596-a71b-4c2f-9b2f-9dcd40b568c3.json
@@ -10,10 +10,6 @@
     "os"
   ],
   "evaluator": {
-    "func": "infeasible",
-    "result": {
-    },
-    "expected": {
-    }
+    "func": "infeasible"
   }
 }
\ No newline at end of file
diff --git a/evaluation_examples/examples/thunderbird/99146c54-4f37-4ab8-9327-5f3291665e1e.json b/evaluation_examples/examples/thunderbird/99146c54-4f37-4ab8-9327-5f3291665e1e.json
index b5e42b0..1df08f6 100644
--- a/evaluation_examples/examples/thunderbird/99146c54-4f37-4ab8-9327-5f3291665e1e.json
+++ b/evaluation_examples/examples/thunderbird/99146c54-4f37-4ab8-9327-5f3291665e1e.json
@@ -43,10 +43,6 @@
     "thunderbird"
   ],
   "evaluator": {
-    "func": "infeasible",
-    "result": {
-    },
-    "expected": {
-    }
+    "func": "infeasible"
   }
 }
diff --git a/evaluation_examples/examples/vlc/5ac2891a-eacd-4954-b339-98abba077adb.json b/evaluation_examples/examples/vlc/5ac2891a-eacd-4954-b339-98abba077adb.json
index 5e45906..cc13526 100644
--- a/evaluation_examples/examples/vlc/5ac2891a-eacd-4954-b339-98abba077adb.json
+++ b/evaluation_examples/examples/vlc/5ac2891a-eacd-4954-b339-98abba077adb.json
@@ -16,10 +16,6 @@
     "vlc"
   ],
   "evaluator": {
-    "func": "infeasible",
-    "result": {
-    },
-    "expected": {
-    }
+    "func": "infeasible"
   }
 }
diff --git a/evaluation_examples/examples/vlc/7882ed6e-bece-4bf0-bada-c32dc1ddae72.json b/evaluation_examples/examples/vlc/7882ed6e-bece-4bf0-bada-c32dc1ddae72.json
index 199e6b0..ba7df32 100644
--- a/evaluation_examples/examples/vlc/7882ed6e-bece-4bf0-bada-c32dc1ddae72.json
+++ b/evaluation_examples/examples/vlc/7882ed6e-bece-4bf0-bada-c32dc1ddae72.json
@@ -16,10 +16,6 @@
     "vlc"
   ],
   "evaluator": {
-    "func": "infeasible",
-    "result": {
-    },
-    "expected": {
-    }
+    "func": "infeasible"
   }
 }
diff --git a/evaluation_examples/examples/vlc/cb130f0d-d36f-4302-9838-b3baf46139b6.json b/evaluation_examples/examples/vlc/cb130f0d-d36f-4302-9838-b3baf46139b6.json
index 851f49a..7937817 100644
--- a/evaluation_examples/examples/vlc/cb130f0d-d36f-4302-9838-b3baf46139b6.json
+++ b/evaluation_examples/examples/vlc/cb130f0d-d36f-4302-9838-b3baf46139b6.json
@@ -16,10 +16,6 @@
     "vlc"
   ],
   "evaluator": {
-    "func": "infeasible",
-    "result": {
-    },
-    "expected": {
-    }
+    "func": "infeasible"
   }
 }
diff --git a/evaluation_examples/examples/vs_code/7aeae0e2-70ee-4705-821d-1bba5d5b2ddd.json b/evaluation_examples/examples/vs_code/7aeae0e2-70ee-4705-821d-1bba5d5b2ddd.json
index 8511936..d7e99ec 100644
--- a/evaluation_examples/examples/vs_code/7aeae0e2-70ee-4705-821d-1bba5d5b2ddd.json
+++ b/evaluation_examples/examples/vs_code/7aeae0e2-70ee-4705-821d-1bba5d5b2ddd.json
@@ -36,10 +36,6 @@
     "vscode"
   ],
   "evaluator": {
-    "func": "infeasible",
-    "result": {
-    },
-    "expected": {
-    }
+    "func": "infeasible"
   }
 }
diff --git a/evaluation_examples/examples/vs_code/7c4cc09e-7a92-40dd-8338-b2286535c4ed.json b/evaluation_examples/examples/vs_code/7c4cc09e-7a92-40dd-8338-b2286535c4ed.json
index a86d94c..d218899 100644
--- a/evaluation_examples/examples/vs_code/7c4cc09e-7a92-40dd-8338-b2286535c4ed.json
+++ b/evaluation_examples/examples/vs_code/7c4cc09e-7a92-40dd-8338-b2286535c4ed.json
@@ -24,10 +24,6 @@
     "vscode"
   ],
   "evaluator": {
-    "func": "infeasible",
-    "result": {
-    },
-    "expected": {
-    }
+    "func": "infeasible"
   }
 }
diff --git a/evaluation_examples/examples/vs_code/847a96b6-df94-4927-97e6-8cc9ea66ced7.json b/evaluation_examples/examples/vs_code/847a96b6-df94-4927-97e6-8cc9ea66ced7.json
index 0abf52a..da1ed29 100644
--- a/evaluation_examples/examples/vs_code/847a96b6-df94-4927-97e6-8cc9ea66ced7.json
+++ b/evaluation_examples/examples/vs_code/847a96b6-df94-4927-97e6-8cc9ea66ced7.json
@@ -59,10 +59,6 @@
     "vscode"
   ],
   "evaluator": {
-    "func": "infeasible",
-    "result": {
-    },
-    "expected": {
-    }
+    "func": "infeasible"
   }
 }
diff --git a/evaluation_examples/examples/vs_code/971cbb5b-3cbf-4ff7-9e24-b5c84fcebfa6.json b/evaluation_examples/examples/vs_code/971cbb5b-3cbf-4ff7-9e24-b5c84fcebfa6.json
index 889f8fa..add5087 100644
--- a/evaluation_examples/examples/vs_code/971cbb5b-3cbf-4ff7-9e24-b5c84fcebfa6.json
+++ b/evaluation_examples/examples/vs_code/971cbb5b-3cbf-4ff7-9e24-b5c84fcebfa6.json
@@ -24,10 +24,6 @@
     "vscode"
   ],
   "evaluator": {
-    "func": "infeasible",
-    "result": {
-    },
-    "expected": {
-    }
+    "func": "infeasible"
   }
 }
diff --git a/evaluation_examples/examples/vs_code/dcbe20e8-647f-4f1d-8696-f1c5bbb570e3.json b/evaluation_examples/examples/vs_code/dcbe20e8-647f-4f1d-8696-f1c5bbb570e3.json
index 63e1f4f..7503b10 100644
--- a/evaluation_examples/examples/vs_code/dcbe20e8-647f-4f1d-8696-f1c5bbb570e3.json
+++ b/evaluation_examples/examples/vs_code/dcbe20e8-647f-4f1d-8696-f1c5bbb570e3.json
@@ -24,10 +24,6 @@
     "vscode"
   ],
   "evaluator": {
-    "func": "infeasible",
-    "result": {
-    },
-    "expected": {
-    }
+    "func": "infeasible"
   }
 }

From 91bc795de182d21504d3ce0a691ee12e234fd118 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Thu, 22 Feb 2024 00:04:02 +0800
Subject: [PATCH 5/5] Examine and load new batch of OS examples from NL2Bash

---
 .../37887e8c-da15-4192-923c-08fa390a176d.json |  2 +-
 .../4127319a-8b79-4410-b58a-7a151e15f3d7.json |  9 ++++++-
 .../4d117223-a354-47fb-8b45-62ab1390a95f.json | 10 ++++++--
 .../5c1075ca-bb34-46a3-a7a0-029bd7463e79.json | 25 ++++++++++++++++---
 .../5ced85fc-fa1a-4217-95fd-0fb530545ce2.json | 15 ++++++++---
 .../6f56bf42-85b8-4fbb-8e06-6c44960184ba.json | 20 +++++++++++++--
 6 files changed, 69 insertions(+), 12 deletions(-)

diff --git a/evaluation_examples/examples/os/37887e8c-da15-4192-923c-08fa390a176d.json b/evaluation_examples/examples/os/37887e8c-da15-4192-923c-08fa390a176d.json
index 76e5ec6..9f4be8c 100644
--- a/evaluation_examples/examples/os/37887e8c-da15-4192-923c-08fa390a176d.json
+++ b/evaluation_examples/examples/os/37887e8c-da15-4192-923c-08fa390a176d.json
@@ -41,7 +41,7 @@
         "parameters": {
           "files": [
             {
-              "url": "https://drive.usercontent.google.com/download?id=1DakvqJfSokEPuH8_LYfSCBbM7Fws1F0o&export=download&authuser=0&confirm=t&uuid=4950eb71-7881-4b52-a94c-a3eed9d2213f&at=APZUnTUy4o4r1ScCnTgwPwNyqhPr:1707992479376",
+              "url": "https://drive.usercontent.google.com/download?id=1DakvqJfSokEPuH8_LYfSCBbM7Fws1F0o&export=download&authuser=0&confirm=t&uuid=42428619-921a-4405-abbf-e5fa7ea0e1b7&at=APZUnTX6nAbw51hosLfUD5CftRg6:1708528738860",
               "path": "eval.sh"
             }
           ]
diff --git a/evaluation_examples/examples/os/4127319a-8b79-4410-b58a-7a151e15f3d7.json b/evaluation_examples/examples/os/4127319a-8b79-4410-b58a-7a151e15f3d7.json
index c1ed512..31d573f 100644
--- a/evaluation_examples/examples/os/4127319a-8b79-4410-b58a-7a151e15f3d7.json
+++ b/evaluation_examples/examples/os/4127319a-8b79-4410-b58a-7a151e15f3d7.json
@@ -25,7 +25,14 @@
     {
       "type": "execute",
       "parameters": {
-        "command": "./setup.sh",
+        "command": "bash ./setup.sh",
+        "shell": true
+      }
+    },
+    {
+      "type": "execute",
+      "parameters": {
+        "command": "xdg-open /home/user/project",
         "shell": true
       }
     }
diff --git a/evaluation_examples/examples/os/4d117223-a354-47fb-8b45-62ab1390a95f.json b/evaluation_examples/examples/os/4d117223-a354-47fb-8b45-62ab1390a95f.json
index e1af8b6..9e69eba 100644
--- a/evaluation_examples/examples/os/4d117223-a354-47fb-8b45-62ab1390a95f.json
+++ b/evaluation_examples/examples/os/4d117223-a354-47fb-8b45-62ab1390a95f.json
@@ -36,17 +36,23 @@
         "command": [
           "python",
           "-c",
-          "import pyautogui; import time; pyautogui.click(960, 540); time.sleep(0.5);"
+          "import pyautogui; import time; pyautogui.click(960, 540); time.sleep(0.5); pyautogui.hotkey('ctrl', 'alt', 't'); time.sleep(0.5)"
         ]
       }
     },
+    {
+      "type": "activate_window",
+      "parameters": {
+        "window_name": "Terminal"
+      }
+    },
     {
       "type": "execute",
       "parameters": {
         "command": [
           "python",
           "-c",
-          "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"alt\", \"t\"); time.sleep(0.5); pyautogui.type(\"cd testDir\"); time.sleep(0.5); pyautogui.press(\"enter\")"
+          "import pyautogui; import time; time.sleep(0.5); pyautogui.write('cd testDir'); time.sleep(0.5); pyautogui.press('enter'); time.sleep(0.5); pyautogui.write('clear'); time.sleep(0.5); pyautogui.press('enter')"
         ]
       }
     }
diff --git a/evaluation_examples/examples/os/5c1075ca-bb34-46a3-a7a0-029bd7463e79.json b/evaluation_examples/examples/os/5c1075ca-bb34-46a3-a7a0-029bd7463e79.json
index 7c69d7c..163b49c 100644
--- a/evaluation_examples/examples/os/5c1075ca-bb34-46a3-a7a0-029bd7463e79.json
+++ b/evaluation_examples/examples/os/5c1075ca-bb34-46a3-a7a0-029bd7463e79.json
@@ -27,7 +27,7 @@
       "parameters": {
         "files": [
           {
-            "url": "https://drive.usercontent.google.com/download?id=1GeXD_pWlqZ7HCco9RorjzJ_f3DeeP91V&export=download&authuser=0&confirm=t&uuid=f4054888-3228-440b-8833-55c50961ea90&at=APZUnTVCiBJw-lRosK673DlvhHyx:1707985591894",
+            "url": "https://drive.usercontent.google.com/download?id=1GeXD_pWlqZ7HCco9RorjzJ_f3DeeP91V&export=download&authuser=0&confirm=t&uuid=9d69de2c-9908-4f6e-ad21-e8329440ee89&at=APZUnTU0uxOOMOi0HuEo2mRLcyjW:1708524614643",
             "path": "eval.sh"
           }
         ]
@@ -50,8 +50,27 @@
     {
       "type": "execute",
       "parameters": {
-        "command": "cd test_environment",
-        "shell": true
+        "command": [
+          "python",
+          "-c",
+          "import pyautogui; import time; pyautogui.click(960, 540); time.sleep(0.5); pyautogui.hotkey('ctrl', 'alt', 't'); time.sleep(0.5)"
+        ]
+      }
+    },
+    {
+      "type": "activate_window",
+      "parameters": {
+        "window_name": "Terminal"
+      }
+    },
+    {
+      "type": "execute",
+      "parameters": {
+        "command": [
+          "python",
+          "-c",
+          "import pyautogui; import time; time.sleep(0.5); pyautogui.write('cd test_environment'); time.sleep(0.5); pyautogui.press('enter')"
+        ]
       }
     }
   ],
diff --git a/evaluation_examples/examples/os/5ced85fc-fa1a-4217-95fd-0fb530545ce2.json b/evaluation_examples/examples/os/5ced85fc-fa1a-4217-95fd-0fb530545ce2.json
index 48a7e72..770eb56 100644
--- a/evaluation_examples/examples/os/5ced85fc-fa1a-4217-95fd-0fb530545ce2.json
+++ b/evaluation_examples/examples/os/5ced85fc-fa1a-4217-95fd-0fb530545ce2.json
@@ -7,8 +7,17 @@
     {
       "type": "execute",
       "parameters": {
-        "command": "echo -e \"1\\n2\\n3\" > input.txt",
-        "shell": true
+        "command": [
+          "python",
+          "-c",
+          "import pyautogui; import time; pyautogui.click(960, 540); time.sleep(0.5); pyautogui.hotkey('ctrl', 'alt', 't'); time.sleep(0.5)"
+        ]
+      }
+    },
+    {
+      "type": "activate_window",
+      "parameters": {
+        "window_name": "Terminal"
       }
     }
   ],
@@ -23,7 +32,7 @@
         "parameters": {
           "files": [
             {
-              "url": "https://drive.usercontent.google.com/download?id=174Bk_JLDwuTTgL0hslzxRG4xB76JxXIR&export=download&authuser=0&confirm=t&uuid=b086a8a1-00b9-4d73-ae05-c34a7fe9693d&at=APZUnTV-T0wfR_ovBue_HnO-tcai:1707989145225",
+              "url": "https://drive.usercontent.google.com/download?id=174Bk_JLDwuTTgL0hslzxRG4xB76JxXIR&export=download&authuser=0&confirm=t&uuid=dc3f716f-e21b-4e46-bfa3-6f78b4c11ed4&at=APZUnTVgdfpiWjfBcp32gAuK57Bl:1708526599813",
               "path": "eval.sh"
             }
           ]
diff --git a/evaluation_examples/examples/os/6f56bf42-85b8-4fbb-8e06-6c44960184ba.json b/evaluation_examples/examples/os/6f56bf42-85b8-4fbb-8e06-6c44960184ba.json
index 7f010a7..c3051ad 100644
--- a/evaluation_examples/examples/os/6f56bf42-85b8-4fbb-8e06-6c44960184ba.json
+++ b/evaluation_examples/examples/os/6f56bf42-85b8-4fbb-8e06-6c44960184ba.json
@@ -10,7 +10,7 @@
       "parameters": {
         "files": [
           {
-            "url": "https://drive.usercontent.google.com/download?id=1BpFAuuH0IHxpWLDF3jaFNxa9QDe_F12D&export=download&authuser=0&confirm=t&uuid=4f18ddd0-81be-414b-b701-289544cdb321&at=APZUnTXlaYyRw5kPCIK4HezQJVrO:1708051633417",
+            "url": "https://drive.usercontent.google.com/download?id=1fQGBoTE40BLoNV_d4KYxbYjKRJdIPDio&export=download&authuser=0&confirm=t&uuid=50f5fff6-4c6b-4ce0-b36e-73f0d4bce8ca&at=APZUnTUA_3uAt14q25NN6dCY5X7e:1708531075515",
             "path": "setup.sh"
           }
         ]
@@ -29,6 +29,22 @@
         "command": "./setup.sh",
         "shell": true
       }
+    },
+    {
+      "type": "execute",
+      "parameters": {
+        "command": [
+          "python",
+          "-c",
+          "import pyautogui; import time; time.sleep(0.5); pyautogui.click(960, 540); time.sleep(0.5); pyautogui.hotkey('ctrl', 'alt', 't'); time.sleep(0.5)"
+        ]
+      }
+    },
+    {
+      "type": "activate_window",
+      "parameters": {
+        "window_name": "Terminal"
+      }
     }
   ],
   "related_apps": [
@@ -41,7 +57,7 @@
         "parameters": {
           "files": [
             {
-              "url": "https://drive.usercontent.google.com/download?id=1fQGBoTE40BLoNV_d4KYxbYjKRJdIPDio&export=download&authuser=0&confirm=t&uuid=b380dc07-a88b-4a65-affe-f3a77108d310&at=APZUnTVe08Pyr4RSR94AmQK7S8t_:1708051631877",
+              "url": "https://drive.usercontent.google.com/download?id=1BpFAuuH0IHxpWLDF3jaFNxa9QDe_F12D&export=download&authuser=0&confirm=t&uuid=695e8a3c-f354-4e9d-a4c3-e01f149be7be&at=APZUnTVHmmnZVIYLPCbuiQduSZZX:1708531243191",
               "path": "eval.sh"
             }
           ]