diff --git a/desktop_env/controllers/setup.py b/desktop_env/controllers/setup.py
index 6728370..3a1d11a 100644
--- a/desktop_env/controllers/setup.py
+++ b/desktop_env/controllers/setup.py
@@ -305,6 +305,57 @@ class SetupController:
             if not terminates:
                 time.sleep(0.3)
 
+    def _execute_with_verification_setup(
+            self,
+            command: List[str],
+            verification: Dict[str, Any] = None,
+            max_wait_time: int = 10,
+            check_interval: float = 1.0,
+            shell: bool = False
+    ):
+        """Execute command with verification of results
+        
+        Args:
+            command: Command to execute
+            verification: Dict with verification criteria:
+                - window_exists: Check if window with this name exists
+                - command_success: Execute this command and check if it succeeds
+            max_wait_time: Maximum time to wait for verification
+            check_interval: Time between verification checks
+            shell: Whether to use shell
+        """
+        if not command:
+            raise Exception("Empty command to launch.")
+
+        verification = verification or {}
+        
+        payload = json.dumps({
+            "command": command, 
+            "shell": shell,
+            "verification": verification,
+            "max_wait_time": max_wait_time,
+            "check_interval": check_interval
+        })
+        headers = {"Content-Type": "application/json"}
+
+        try:
+            response = requests.post(self.http_server + "/setup" + "/execute_with_verification", 
+                                   headers=headers, data=payload, timeout=max_wait_time + 10)
+            if response.status_code == 200:
+                result = response.json()
+                logger.info("Command executed and verified successfully: %s -> %s"
+                            , " ".join(command) if isinstance(command, list) else command
+                            , response.text
+                            )
+                return result
+            else:
+                logger.error("Failed to execute with verification. Status code: %s", response.text)
+                raise Exception(f"Command verification failed: {response.text}")
+        except requests.exceptions.RequestException as e:
+            logger.error("An error occurred while trying to send the request: %s", e)
+            traceback.print_exc()
+            raise Exception(f"Request failed: {e}")
+
     def _command_setup(self, command: List[str], **kwargs):
         self._execute_setup(command, **kwargs)
 
diff --git a/desktop_env/desktop_env.py b/desktop_env/desktop_env.py
index 72bea71..4c8cc48 100644
--- a/desktop_env/desktop_env.py
+++ b/desktop_env/desktop_env.py
@@ -18,7 +18,7 @@ logger = logging.getLogger("desktopenv.env")
 Metric = Callable[[Any, Any], float]
 Getter = Callable[[gym.Env, Dict[str, Any]], Any]
 
-MAX_RETRIES = 5
+MAX_RETRIES = 5 # Maximum retries for environment setup
 
 class DesktopEnv(gym.Env):
     """
@@ -72,6 +72,16 @@ class DesktopEnv(gym.Env):
 
         self.os_type = os_type
 
+        # Track whether environment has been used (step/setup) to optimize snapshot revert
+        # docker, aws, gcp, azure are always unused as the emulator starts from a clean state
+        # vmware, virtualbox are always used as the emulator starts from a dirty state
+        if self.provider_name in {"docker", "aws", "gcp", "azure"}:
+            self.is_environment_used = False
+        elif self.provider_name in {"vmware", "virtualbox"}:
+            self.is_environment_used = True
+        else:
+            raise ValueError(f"Invalid provider name: {self.provider_name}")
+
         # Initialize environment variables
         if path_to_vm:
             self.path_to_vm = os.path.abspath(os.path.expandvars(os.path.expanduser(path_to_vm))) \
@@ -190,11 +200,19 @@ class DesktopEnv(gym.Env):
                         logger.info("Using regular AWS provider.")
 
         
-            logger.info("Reverting to snapshot to {}...".format(self.snapshot_name))
-            self._revert_to_snapshot()
-            logger.info("Starting emulator...")
-            self._start_emulator()
-            logger.info("Emulator started.")
+            # Only revert to snapshot if environment has been used (step/setup)
+            # This optimization is especially important for cloud providers like AWS
+            # where unnecessary snapshot operations are costly and time-consuming
+            if self.is_environment_used:
+                logger.info("Environment has been used, reverting to snapshot {}...".format(self.snapshot_name))
+                self._revert_to_snapshot()
+                logger.info("Starting emulator...")
+                self._start_emulator()
+                logger.info("Emulator started.")
+                # Reset the usage flag after reverting
+                self.is_environment_used = False
+            else:
+                logger.info("Environment is clean, skipping snapshot revert (provider: {}).".format(self.provider_name))
 
             if task_config is not None:
                 self._set_task_info(task_config)
@@ -202,6 +220,9 @@ class DesktopEnv(gym.Env):
                 logger.info("Setting up environment...")
                 success = self.setup_controller.setup(self.config)
                 if success:
+                    # Mark environment as used when setup is successfully executed
+                    if self.config:  # Only mark as used if there were actual setup operations
+                        self.is_environment_used = True
                     break
                 else:
                     logger.error(
@@ -300,6 +321,9 @@ class DesktopEnv(gym.Env):
     def step(self, action, pause=2):
         self._step_no += 1
         self.action_history.append(action)
+        
+        # Mark environment as used when step is called
+        self.is_environment_used = True
 
         reward = 0  # todo: Define reward calculation for each example
         done = False  # todo: Define episode termination condition for each example
@@ -336,7 +360,11 @@ class DesktopEnv(gym.Env):
         Evaluate whether the task is successfully completed.
         """
 
-        self.setup_controller.setup(self.evaluator.get("postconfig", []))
+        postconfig = self.evaluator.get("postconfig", [])
+        self.setup_controller.setup(postconfig)
+        # Mark environment as used if there were postconfig setup operations
+        if postconfig:
+            self.is_environment_used = True
 
         if self.evaluator['func'] == "infeasible":
             if len(self.action_history) > 0 and self.action_history[-1] == "FAIL":
diff --git a/desktop_env/providers/aws/manager.py b/desktop_env/providers/aws/manager.py
index bcc89c5..076c182 100644
--- a/desktop_env/providers/aws/manager.py
+++ b/desktop_env/providers/aws/manager.py
@@ -6,7 +6,8 @@ import logging
 import dotenv
 import signal
 
-INSTANCE_TYPE = "t3.medium" 
+# Using m5.large for better storage I/O performance to match io2 capabilities
+INSTANCE_TYPE = "m5.large"
 
 # Load environment variables from .env file
 dotenv.load_dotenv()
diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py
index de8009d..c39943e 100644
--- a/desktop_env/server/main.py
+++ b/desktop_env/server/main.py
@@ -117,6 +117,113 @@ def execute_command():
         }), 500
 
 
+@app.route('/setup/execute_with_verification', methods=['POST'])
+@app.route('/execute_with_verification', methods=['POST'])
+def execute_command_with_verification():
+    """Execute command and verify the result based on provided verification criteria"""
+    data = request.json
+    shell = data.get('shell', False)
+    command = data.get('command', "" if shell else [])
+    verification = data.get('verification', {})
+    max_wait_time = data.get('max_wait_time', 10)  # Maximum wait time in seconds
+    check_interval = data.get('check_interval', 1)  # Check interval in seconds
+
+    if isinstance(command, str) and not shell:
+        command = shlex.split(command)
+
+    # Expand user directory
+    for i, arg in enumerate(command):
+        if arg.startswith("~/"):
+            command[i] = os.path.expanduser(arg)
+
+    # Execute the main command
+    try:
+        if platform_name == "Windows":
+            flags = subprocess.CREATE_NO_WINDOW
+        else:
+            flags = 0
+        result = subprocess.run(
+            command,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            shell=shell,
+            text=True,
+            timeout=120,
+            creationflags=flags,
+        )
+        
+        # If no verification is needed, return immediately
+        if not verification:
+            return jsonify({
+                'status': 'success',
+                'output': result.stdout,
+                'error': result.stderr,
+                'returncode': result.returncode
+            })
+        
+        # Wait and verify the result
+        import time
+        start_time = time.time()
+        while time.time() - start_time < max_wait_time:
+            verification_passed = True
+            
+            # Check window existence if specified
+            if 'window_exists' in verification:
+                window_name = verification['window_exists']
+                try:
+                    if platform_name == 'Linux':
+                        wmctrl_result = subprocess.run(['wmctrl', '-l'], 
+                                                     capture_output=True, text=True, check=True)
+                        if window_name.lower() not in wmctrl_result.stdout.lower():
+                            verification_passed = False
+                    elif platform_name in ['Windows', 'Darwin']:
+                        import pygetwindow as gw
+                        windows = gw.getWindowsWithTitle(window_name)
+                        if not windows:
+                            verification_passed = False
+                except Exception:
+                    verification_passed = False
+            
+            # Check command execution if specified
+            if 'command_success' in verification:
+                verify_cmd = verification['command_success']
+                try:
+                    verify_result = subprocess.run(verify_cmd, shell=True, 
+                                                 capture_output=True, text=True, timeout=5)
+                    if verify_result.returncode != 0:
+                        verification_passed = False
+                except Exception:
+                    verification_passed = False
+            
+            if verification_passed:
+                return jsonify({
+                    'status': 'success',
+                    'output': result.stdout,
+                    'error': result.stderr,
+                    'returncode': result.returncode,
+                    'verification': 'passed',
+                    'wait_time': time.time() - start_time
+                })
+            
+            time.sleep(check_interval)
+        
+        # Verification failed
+        return jsonify({
+            'status': 'verification_failed',
+            'output': result.stdout,
+            'error': result.stderr,
+            'returncode': result.returncode,
+            'verification': 'failed',
+            'wait_time': max_wait_time
+        }), 500
+        
+    except Exception as e:
+        return jsonify({
+            'status': 'error',
+            'message': str(e)
+        }), 500
+
+
 def _get_machine_architecture() -> str:
     """ Get the machine architecture, e.g., x86_64, arm64, aarch64, i386, etc.
     """
@@ -1129,20 +1236,36 @@ def open_file():
 
     path_obj = Path(os.path.expandvars(os.path.expanduser(path)))
 
-    if not path_obj.exists():
-        return f"File not found: {path_obj}", 404
+    # Check if it's a file path that exists
+    is_file_path = path_obj.exists()
+    
+    # If it's not a file path, treat it as an application name/command
+    if not is_file_path:
+        # Check if it's a valid command by trying to find it in PATH
+        import shutil
+        if not shutil.which(path):
+            return f"Application/file not found: {path}", 404
 
     try:
-        if platform.system() == "Windows":
-            os.startfile(path_obj)
+        if is_file_path:
+            # Handle file opening
+            if platform.system() == "Windows":
+                os.startfile(path_obj)
+            else:
+                open_cmd: str = "open" if platform.system() == "Darwin" else "xdg-open"
+                subprocess.Popen([open_cmd, str(path_obj)])
+            file_name = path_obj.name
+            file_name_without_ext, _ = os.path.splitext(file_name)
         else:
-            open_cmd: str = "open" if platform.system() == "Darwin" else "xdg-open"
-            subprocess.Popen([open_cmd, str(path_obj)])
+            # Handle application launching
+            if platform.system() == "Windows":
+                subprocess.Popen([path])
+            else:
+                subprocess.Popen([path])
+            file_name = path
+            file_name_without_ext = path
 
-        # Wait for the file to open
-        file_name = path_obj.name
-        # Some apps don't include the extension in the title
-        file_name_without_ext, _ = os.path.splitext(file_name)
+        # Wait for the file/application to open
 
         start_time = time.time()
         window_found = False
@@ -1365,7 +1488,7 @@ def end_recording():
     error_output = ""
     try:
         # Send SIGINT for a graceful shutdown, allowing ffmpeg to finalize the file.
-    recording_process.send_signal(signal.SIGINT)
+        recording_process.send_signal(signal.SIGINT)
         # Wait for ffmpeg to terminate. communicate() gets output and waits.
         _, error_output = recording_process.communicate(timeout=15)
     except subprocess.TimeoutExpired:
@@ -1373,7 +1496,7 @@ def end_recording():
         recording_process.kill()
         # After killing, communicate to get any remaining output.
         _, error_output = recording_process.communicate()
-    recording_process = None
+        recording_process = None
         return jsonify({
             'status': 'error',
             'message': f'Recording process was unresponsive and had to be killed. Stderr: {error_output}'
diff --git a/evaluation_examples/examples/os/4127319a-8b79-4410-b58a-7a151e15f3d7.json b/evaluation_examples/examples/os/4127319a-8b79-4410-b58a-7a151e15f3d7.json
index 36be485..eb73a32 100644
--- a/evaluation_examples/examples/os/4127319a-8b79-4410-b58a-7a151e15f3d7.json
+++ b/evaluation_examples/examples/os/4127319a-8b79-4410-b58a-7a151e15f3d7.json
@@ -50,7 +50,7 @@
       "type": "rule",
       "rules": {
         "include": [
-          "54\n"
+          "54"
         ],
         "exclude": []
       }