Environment is_used flag; OS domain fix (#219)

* Refactor evaluator structure in LibreOffice Writer example JSON to support multiple expected and result files, enhancing evaluation flexibility. * Update instance type to t3.large and add VNC access URL logging for allocated VMs, enhancing remote access capabilities. * Update instance type to t3.large and add VNC access URL logging for allocated VMs, enhancing remote access capabilities. * Update time format in get_vm_file function to include hours, minutes, and seconds for more precise file naming with time suffix. * More delay for 936321ce-5236-426a-9a20-e0e3c5dc536f; support one more potential solutions. * Enhance SetupController with configurable retry limit and improved error handling for file opening requests. Introduce new function to compare unique training records, and update logging for better debugging. Adjust JSON examples for evaluation to support multiple expected and result files. * Clean debug code * Enhance DesktopEnv to track environment usage for optimized snapshot management. Introduce is_environment_used flag to determine if a snapshot revert is necessary based on provider type. Update setup and step methods to mark environment usage appropriately. Add new execute_with_verification method in SetupController for command execution with result verification, improving reliability. Change AWS instance type to m5.large for better performance and update AMI ID for compatibility. Update file opening logic in main.py to handle both file paths and application commands more effectively. --------- Co-authored-by: yuanmengqi <yuanmengqi@mail.ustc.edu.cn>
2025-06-28 00:45:53 +08:00
parent 48ac57697a
commit 0cc93543a8
5 changed files with 224 additions and 21 deletions
--- a/desktop_env/desktop_env.py
+++ b/desktop_env/desktop_env.py
@@ -18,7 +18,7 @@ logger = logging.getLogger("desktopenv.env")
 Metric = Callable[[Any, Any], float]
 Getter = Callable[[gym.Env, Dict[str, Any]], Any]

-MAX_RETRIES = 5
+MAX_RETRIES = 5 # Maximum retries for environment setup

 class DesktopEnv(gym.Env):
    """
@@ -72,6 +72,16 @@ class DesktopEnv(gym.Env):

        self.os_type = os_type

+        # Track whether environment has been used (step/setup) to optimize snapshot revert
+        # docker, aws, gcp, azure are always unused as the emulator starts from a clean state
+        # vmware, virtualbox are always used as the emulator starts from a dirty state
+        if self.provider_name in {"docker", "aws", "gcp", "azure"}:
+            self.is_environment_used = False
+        elif self.provider_name in {"vmware", "virtualbox"}:
+            self.is_environment_used = True
+        else:
+            raise ValueError(f"Invalid provider name: {self.provider_name}")
+
        # Initialize environment variables
        if path_to_vm:
            self.path_to_vm = os.path.abspath(os.path.expandvars(os.path.expanduser(path_to_vm))) \
@@ -190,11 +200,19 @@ class DesktopEnv(gym.Env):
                        logger.info("Using regular AWS provider.")

        
-            logger.info("Reverting to snapshot to {}...".format(self.snapshot_name))
-            self._revert_to_snapshot()
-            logger.info("Starting emulator...")
-            self._start_emulator()
-            logger.info("Emulator started.")
+            # Only revert to snapshot if environment has been used (step/setup)
+            # This optimization is especially important for cloud providers like AWS
+            # where unnecessary snapshot operations are costly and time-consuming
+            if self.is_environment_used:
+                logger.info("Environment has been used, reverting to snapshot {}...".format(self.snapshot_name))
+                self._revert_to_snapshot()
+                logger.info("Starting emulator...")
+                self._start_emulator()
+                logger.info("Emulator started.")
+                # Reset the usage flag after reverting
+                self.is_environment_used = False
+            else:
+                logger.info("Environment is clean, skipping snapshot revert (provider: {}).".format(self.provider_name))

            if task_config is not None:
                self._set_task_info(task_config)
@@ -202,6 +220,9 @@ class DesktopEnv(gym.Env):
                logger.info("Setting up environment...")
                success = self.setup_controller.setup(self.config)
                if success:
+                    # Mark environment as used when setup is successfully executed
+                    if self.config:  # Only mark as used if there were actual setup operations
+                        self.is_environment_used = True
                    break
                else:
                    logger.error(
@@ -300,6 +321,9 @@ class DesktopEnv(gym.Env):
    def step(self, action, pause=2):
        self._step_no += 1
        self.action_history.append(action)
+        
+        # Mark environment as used when step is called
+        self.is_environment_used = True

        reward = 0  # todo: Define reward calculation for each example
        done = False  # todo: Define episode termination condition for each example
@@ -336,7 +360,11 @@ class DesktopEnv(gym.Env):
        Evaluate whether the task is successfully completed.
        """

-        self.setup_controller.setup(self.evaluator.get("postconfig", []))
+        postconfig = self.evaluator.get("postconfig", [])
+        self.setup_controller.setup(postconfig)
+        # Mark environment as used if there were postconfig setup operations
+        if postconfig:
+            self.is_environment_used = True

        if self.evaluator['func'] == "infeasible":
            if len(self.action_history) > 0 and self.action_history[-1] == "FAIL":