diff --git a/desktop_env/controllers/setup.py b/desktop_env/controllers/setup.py index c637ec4..2635cfc 100644 --- a/desktop_env/controllers/setup.py +++ b/desktop_env/controllers/setup.py @@ -199,26 +199,62 @@ class SetupController: path: str = f["path"] if not os.path.exists(local_path): - logger.error(f"Setup Upload - Invalid local path ({local_path}).") - return + raise Exception(f"Setup Upload - Invalid local path ({local_path}).") - form = MultipartEncoder({ - "file_path": path, - "file_data": (os.path.basename(path), open(local_path, "rb")) - }) - headers = {"Content-Type": form.content_type} - logger.debug(form.content_type) - - # send request to server to upload file + file_size = None try: - logger.debug("REQUEST ADDRESS: %s", self.http_server + "/setup" + "/upload") - response = requests.post(self.http_server + "/setup" + "/upload", headers=headers, data=form) - if response.status_code == 200: - logger.info("Command executed successfully: %s", response.text) - else: - logger.error("Failed to upload file. Status code: %s", response.text) - except requests.exceptions.RequestException as e: - logger.error("An error occurred while trying to send the request: %s", e) + file_size = os.path.getsize(local_path) + except Exception: + pass + + max_retries = 3 + last_error: Optional[Exception] = None + + for attempt in range(max_retries): + try: + logger.info( + f"Uploading {os.path.basename(local_path)}{f' ({file_size} bytes)' if file_size is not None else ''} " + f"to VM at {path} (attempt {attempt + 1}/{max_retries})" + ) + logger.debug("REQUEST ADDRESS: %s", self.http_server + "/setup" + "/upload") + + # Open the file inside each attempt to ensure fresh stream position + with open(local_path, "rb") as fp: + form = MultipartEncoder({ + "file_path": path, + "file_data": (os.path.basename(path), fp) + }) + headers = {"Content-Type": form.content_type} + logger.debug(form.content_type) + + # Explicit connect/read timeout to avoid hanging forever + response = requests.post( + self.http_server + "/setup" + "/upload", + headers=headers, + data=form, + timeout=(10, 600) + ) + + if response.status_code == 200: + logger.info(f"File uploaded successfully: {path}") + logger.debug("Upload response: %s", response.text) + last_error = None + break + else: + msg = f"Failed to upload file {path}. Status code: {response.status_code}, Response: {response.text}" + logger.error(msg) + last_error = requests.RequestException(msg) + + except requests.exceptions.RequestException as e: + last_error = e + logger.error(f"Upload attempt {attempt + 1} failed for {path}: {e}") + + # Exponential backoff between retries + if attempt < max_retries - 1: + time.sleep(2 ** attempt) + + if last_error is not None: + raise last_error def _change_wallpaper_setup(self, path: str): if not path: diff --git a/desktop_env/desktop_env.py b/desktop_env/desktop_env.py index 9f621fc..4f5836d 100644 --- a/desktop_env/desktop_env.py +++ b/desktop_env/desktop_env.py @@ -172,54 +172,52 @@ class DesktopEnv(gym.Env): else: self.path_to_vm = self.manager.get_vm_path(os_type=self.os_type, region=region, screen_size=(self.screen_width, self.screen_height)) - try: - self.snapshot_name = snapshot_name - self.cache_dir_base: str = cache_dir - # todo: add the logic to get the screen size from the VM - self.headless = headless - self.require_a11y_tree = require_a11y_tree - self.require_terminal = require_terminal + self.snapshot_name = snapshot_name + self.cache_dir_base: str = cache_dir + # todo: add the logic to get the screen size from the VM + self.headless = headless + self.require_a11y_tree = require_a11y_tree + self.require_terminal = require_terminal - # Initialize emulator and controller - logger.info("Initializing...") - self._start_emulator() + # Initialize emulator and controller + logger.info("Initializing...") + self._start_emulator() - # mode: human or machine - self.instruction = None - assert action_space in ["computer_13", "pyautogui", "claude_computer_use", "autoglm_computer_use"] - self.action_space = action_space # todo: refactor it to the ActType + # mode: human or machine + self.instruction = None + assert action_space in ["computer_13", "pyautogui", "claude_computer_use", "autoglm_computer_use"] + self.action_space = action_space # todo: refactor it to the ActType + + # episodic stuffs, like counters, will be updated or reset + # when calling self.reset() + self._traj_no: int = -1 + self._step_no: int = 0 + self.action_history: List[Dict[str, any]] = [] - # episodic stuffs, like counters, will be updated or reset - # when calling self.reset() - self._traj_no: int = -1 - self._step_no: int = 0 - self.action_history: List[Dict[str, any]] = [] - except Exception as e: - logger.error(f"Failed to initialize DesktopEnv: {e}") - # If initialization fails, we should clean up the VM - try: - self.close() - self.manager.delete_vm(self.path_to_vm, self.region) - logger.info(f"Cleaned up VM {self.path_to_vm}.") - except Exception as cleanup_error: - logger.error(f"Failed to clean up VM {self.path_to_vm}: {cleanup_error}") - raise def _start_emulator(self): - # Power on the virtual machine - self.provider.start_emulator(self.path_to_vm, self.headless, self.os_type) + try: + # Power on the virtual machine + self.provider.start_emulator(self.path_to_vm, self.headless, self.os_type) - # Get the ip from the virtual machine, and setup the controller - vm_ip_ports = self.provider.get_ip_address(self.path_to_vm).split(':') - self.vm_ip = vm_ip_ports[0] - # Get the ports from the virtual machine (for Docker provider only) - if len(vm_ip_ports) > 1: - self.server_port = int(vm_ip_ports[1]) - self.chromium_port = int(vm_ip_ports[2]) - self.vnc_port = int(vm_ip_ports[3]) - self.vlc_port = int(vm_ip_ports[4]) - self.controller = PythonController(vm_ip=self.vm_ip, server_port=self.server_port) - self.setup_controller = SetupController(vm_ip=self.vm_ip, server_port=self.server_port, chromium_port=self.chromium_port, vlc_port=self.vlc_port, cache_dir=self.cache_dir_base, client_password=self.client_password, screen_width=self.screen_width, screen_height=self.screen_height) + # Get the ip from the virtual machine, and setup the controller + vm_ip_ports = self.provider.get_ip_address(self.path_to_vm).split(':') + self.vm_ip = vm_ip_ports[0] + # Get the ports from the virtual machine (for Docker provider only) + if len(vm_ip_ports) > 1: + self.server_port = int(vm_ip_ports[1]) + self.chromium_port = int(vm_ip_ports[2]) + self.vnc_port = int(vm_ip_ports[3]) + self.vlc_port = int(vm_ip_ports[4]) + self.controller = PythonController(vm_ip=self.vm_ip, server_port=self.server_port) + self.setup_controller = SetupController(vm_ip=self.vm_ip, server_port=self.server_port, chromium_port=self.chromium_port, vlc_port=self.vlc_port, cache_dir=self.cache_dir_base, client_password=self.client_password, screen_width=self.screen_width, screen_height=self.screen_height) + + except Exception as e: + try: + self.provider.stop_emulator(self.path_to_vm) + except Exception as stop_err: + logger.warning(f"Cleanup after interrupt failed: {stop_err}") + raise def _revert_to_snapshot(self): # Revert to certain snapshot of the virtual machine, and refresh the path to vm and ip of vm diff --git a/requirements.txt b/requirements.txt index ab146f1..363d4ce 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ numpy~=1.24.4 Pillow~=11.0.0 fabric gymnasium~=0.28.1 -requests~=2.31.0 +requests pytz~=2024.1 transformers~=4.35.2 torch~=2.5.0 diff --git a/setup.py b/setup.py index 92f697a..0be81b9 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ setup( "Pillow~=11.0.0", "fabric", "gymnasium~=0.28.1", - "requests~=2.31.0", + "requests", "pytz~=2024.1", "transformers~=4.35.2", "torch~=2.5.0",