diff --git a/desktop_env/controllers/setup.py b/desktop_env/controllers/setup.py index 4373600..65b4a8a 100644 --- a/desktop_env/controllers/setup.py +++ b/desktop_env/controllers/setup.py @@ -36,6 +36,8 @@ FILE_PATH = os.path.dirname(os.path.abspath(__file__)) init_proxy_pool(PROXY_CONFIG_FILE) # initialize the global proxy pool +MAX_RETRIES = 20 + class SetupController: def __init__(self, vm_ip: str, server_port: int = 5000, chromium_port: int = 9222, vlc_port: int = 8080, cache_dir: str = "cache"): self.vm_ip: str = vm_ip @@ -64,20 +66,20 @@ class SetupController: # make sure connection can be established logger.info(f"try to connect {self.http_server}") retry = 0 - while retry < 50: + while retry < MAX_RETRIES: try: _ = requests.get(self.http_server + "/terminal") break except: time.sleep(5) retry += 1 - logger.info(f"retry: {retry}/50") + logger.info(f"retry: {retry}/{MAX_RETRIES}") - if retry == 50: + if retry == MAX_RETRIES: return False - for cfg in config: + for i, cfg in enumerate(config): config_type: str = cfg["type"] parameters: Dict[str, Any] = cfg["parameters"] @@ -85,10 +87,17 @@ class SetupController: # protocol setup_function: str = "_{:}_setup".format(config_type) assert hasattr(self, setup_function), f'Setup controller cannot find init function {setup_function}' - logger.info(f"call function {setup_function}") - getattr(self, setup_function)(**parameters) - - logger.info("SETUP: %s(%s)", setup_function, str(parameters)) + + try: + logger.info(f"Executing setup step {i+1}/{len(config)}: {setup_function}") + logger.debug(f"Setup parameters: {parameters}") + getattr(self, setup_function)(**parameters) + logger.info(f"SETUP COMPLETED: {setup_function}({str(parameters)})") + except Exception as e: + logger.error(f"SETUP FAILED at step {i+1}/{len(config)}: {setup_function}({str(parameters)})") + logger.error(f"Error details: {e}") + logger.error(f"Traceback: {traceback.format_exc()}") + raise Exception(f"Setup step {i+1} failed: {setup_function} - {e}") from e return True @@ -111,25 +120,41 @@ class SetupController: raise Exception(f"Setup Download - Invalid URL ({url}) or path ({path}).") if not os.path.exists(cache_path): + logger.info(f"Cache file not found, downloading from {url} to {cache_path}") max_retries = 3 downloaded = False e = None for i in range(max_retries): try: - response = requests.get(url, stream=True) + logger.info(f"Download attempt {i+1}/{max_retries} for {url}") + response = requests.get(url, stream=True, timeout=300) # Add 5 minute timeout response.raise_for_status() + + # Get file size if available + total_size = int(response.headers.get('content-length', 0)) + if total_size > 0: + logger.info(f"File size: {total_size / (1024*1024):.2f} MB") + downloaded_size = 0 with open(cache_path, 'wb') as f: for chunk in response.iter_content(chunk_size=8192): if chunk: f.write(chunk) - logger.info("File downloaded successfully") + downloaded_size += len(chunk) + if total_size > 0 and downloaded_size % (1024*1024) == 0: # Log every MB + progress = (downloaded_size / total_size) * 100 + logger.info(f"Download progress: {progress:.1f}%") + + logger.info(f"File downloaded successfully to {cache_path} ({downloaded_size / (1024*1024):.2f} MB)") downloaded = True break except requests.RequestException as e: logger.error( f"Failed to download {url} caused by {e}. Retrying... ({max_retries - i - 1} attempts left)") + # Clean up partial download + if os.path.exists(cache_path): + os.remove(cache_path) if not downloaded: raise requests.RequestException(f"Failed to download {url}. No retries left.") @@ -142,14 +167,18 @@ class SetupController: # send request to server to upload file try: + logger.info(f"Uploading {os.path.basename(path)} to VM at {path}") logger.debug("REQUEST ADDRESS: %s", self.http_server + "/setup" + "/upload") - response = requests.post(self.http_server + "/setup" + "/upload", headers=headers, data=form) + response = requests.post(self.http_server + "/setup" + "/upload", headers=headers, data=form, timeout=600) # 10 minute timeout for upload if response.status_code == 200: - logger.info("Command executed successfully: %s", response.text) + logger.info(f"File uploaded successfully: {path}") + logger.debug("Upload response: %s", response.text) else: - logger.error("Failed to upload file. Status code: %s", response.text) + logger.error(f"Failed to upload file {path}. Status code: {response.status_code}, Response: {response.text}") + raise requests.RequestException(f"Upload failed with status {response.status_code}") except requests.exceptions.RequestException as e: - logger.error("An error occurred while trying to send the request: %s", e) + logger.error(f"An error occurred while trying to upload {path}: {e}") + raise def _upload_file_setup(self, files: List[Dict[str, str]]): """ @@ -219,13 +248,14 @@ class SetupController: # send request to server to open file try: - response = requests.post(self.http_server + "/setup" + "/open_file", headers=headers, data=payload) - if response.status_code == 200: - logger.info("Command executed successfully: %s", response.text) - else: - logger.error("Failed to open file. Status code: %s", response.text) + # The server-side call is now blocking and can take time. + # We set a timeout that is slightly longer than the server's timeout (1800s). + response = requests.post(self.http_server + "/setup" + "/open_file", headers=headers, data=payload, timeout=1810) + response.raise_for_status() # This will raise an exception for 4xx and 5xx status codes + logger.info("Command executed successfully: %s", response.text) except requests.exceptions.RequestException as e: - logger.error("An error occurred while trying to send the request: %s", e) + logger.error(f"Failed to open file '{path}'. An error occurred while trying to send the request or the server responded with an error: {e}") + raise Exception(f"Failed to open file '{path}'. An error occurred while trying to send the request or the server responded with an error: {e}") from e def _launch_setup(self, command: Union[str, List[str]], shell: bool = False): if not command: @@ -302,6 +332,57 @@ class SetupController: if not terminates: time.sleep(0.3) + def _execute_with_verification_setup( + self, + command: List[str], + verification: Dict[str, Any] = None, + max_wait_time: int = 10, + check_interval: float = 1.0, + shell: bool = False + ): + """Execute command with verification of results + + Args: + command: Command to execute + verification: Dict with verification criteria: + - window_exists: Check if window with this name exists + - command_success: Execute this command and check if it succeeds + max_wait_time: Maximum time to wait for verification + check_interval: Time between verification checks + shell: Whether to use shell + """ + if not command: + raise Exception("Empty command to launch.") + + verification = verification or {} + + payload = json.dumps({ + "command": command, + "shell": shell, + "verification": verification, + "max_wait_time": max_wait_time, + "check_interval": check_interval + }) + headers = {"Content-Type": "application/json"} + + try: + response = requests.post(self.http_server + "/setup" + "/execute_with_verification", + headers=headers, data=payload, timeout=max_wait_time + 10) + if response.status_code == 200: + result = response.json() + logger.info("Command executed and verified successfully: %s -> %s" + , " ".join(command) if isinstance(command, list) else command + , response.text + ) + return result + else: + logger.error("Failed to execute with verification. Status code: %s", response.text) + raise Exception(f"Command verification failed: {response.text}") + except requests.exceptions.RequestException as e: + logger.error("An error occurred while trying to send the request: %s", e) + traceback.print_exc() + raise Exception(f"Request failed: {e}") + def _command_setup(self, command: List[str], **kwargs): self._execute_setup(command, **kwargs) diff --git a/desktop_env/desktop_env.py b/desktop_env/desktop_env.py index 72bea71..4c8cc48 100644 --- a/desktop_env/desktop_env.py +++ b/desktop_env/desktop_env.py @@ -18,7 +18,7 @@ logger = logging.getLogger("desktopenv.env") Metric = Callable[[Any, Any], float] Getter = Callable[[gym.Env, Dict[str, Any]], Any] -MAX_RETRIES = 5 +MAX_RETRIES = 5 # Maximum retries for environment setup class DesktopEnv(gym.Env): """ @@ -72,6 +72,16 @@ class DesktopEnv(gym.Env): self.os_type = os_type + # Track whether environment has been used (step/setup) to optimize snapshot revert + # docker, aws, gcp, azure are always unused as the emulator starts from a clean state + # vmware, virtualbox are always used as the emulator starts from a dirty state + if self.provider_name in {"docker", "aws", "gcp", "azure"}: + self.is_environment_used = False + elif self.provider_name in {"vmware", "virtualbox"}: + self.is_environment_used = True + else: + raise ValueError(f"Invalid provider name: {self.provider_name}") + # Initialize environment variables if path_to_vm: self.path_to_vm = os.path.abspath(os.path.expandvars(os.path.expanduser(path_to_vm))) \ @@ -190,11 +200,19 @@ class DesktopEnv(gym.Env): logger.info("Using regular AWS provider.") - logger.info("Reverting to snapshot to {}...".format(self.snapshot_name)) - self._revert_to_snapshot() - logger.info("Starting emulator...") - self._start_emulator() - logger.info("Emulator started.") + # Only revert to snapshot if environment has been used (step/setup) + # This optimization is especially important for cloud providers like AWS + # where unnecessary snapshot operations are costly and time-consuming + if self.is_environment_used: + logger.info("Environment has been used, reverting to snapshot {}...".format(self.snapshot_name)) + self._revert_to_snapshot() + logger.info("Starting emulator...") + self._start_emulator() + logger.info("Emulator started.") + # Reset the usage flag after reverting + self.is_environment_used = False + else: + logger.info("Environment is clean, skipping snapshot revert (provider: {}).".format(self.provider_name)) if task_config is not None: self._set_task_info(task_config) @@ -202,6 +220,9 @@ class DesktopEnv(gym.Env): logger.info("Setting up environment...") success = self.setup_controller.setup(self.config) if success: + # Mark environment as used when setup is successfully executed + if self.config: # Only mark as used if there were actual setup operations + self.is_environment_used = True break else: logger.error( @@ -300,6 +321,9 @@ class DesktopEnv(gym.Env): def step(self, action, pause=2): self._step_no += 1 self.action_history.append(action) + + # Mark environment as used when step is called + self.is_environment_used = True reward = 0 # todo: Define reward calculation for each example done = False # todo: Define episode termination condition for each example @@ -336,7 +360,11 @@ class DesktopEnv(gym.Env): Evaluate whether the task is successfully completed. """ - self.setup_controller.setup(self.evaluator.get("postconfig", [])) + postconfig = self.evaluator.get("postconfig", []) + self.setup_controller.setup(postconfig) + # Mark environment as used if there were postconfig setup operations + if postconfig: + self.is_environment_used = True if self.evaluator['func'] == "infeasible": if len(self.action_history) > 0 and self.action_history[-1] == "FAIL": diff --git a/desktop_env/evaluators/getters/file.py b/desktop_env/evaluators/getters/file.py index 9171e7d..f4ab03a 100644 --- a/desktop_env/evaluators/getters/file.py +++ b/desktop_env/evaluators/getters/file.py @@ -80,18 +80,16 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option returned. only support for single file now: time_suffix(bool): optional. defaults to False. if True, append the current time in required format. - time_format(str): optional. defaults to "%Y_%m_%d". format of the time suffix. + time_format(str): optional. defaults to "%Y%m%d_%H%M%S". format of the time suffix. """ - time_format = "%Y_%m_%d" + time_format = "%Y%m%d_%H%M%S" if not config.get("multi", False): paths: List[str] = [config["path"]] dests: List[str] = [config["dest"]] - if "time_suffix" in config.keys() and config["time_suffix"]: - if "time_format" in config.keys(): - time_format = config["time_format"] - # Insert time before . in file type suffix - paths = [p.split(".")[0] + datetime.now().strftime(time_format) + "." + p.split(".")[1] if "." in p else p for p in paths] - dests = [d.split(".")[0] + datetime.now().strftime(time_format) + "." + d.split(".")[1] if "." in d else d for d in dests] + if config.get("time_suffix", False): + time_format = config.get("time_format", time_format) + # Insert time before file extension. + dests = [f"{os.path.splitext(d)[0]}_{datetime.now().strftime(time_format)}{os.path.splitext(d)[1]}" for d in dests] else: paths: List[str] = config["path"] dests: List[str] = config["dest"] diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index c628206..40b860e 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -53,7 +53,8 @@ from .docs import ( compare_docx_files_and_ignore_new_lines, compare_docx_images, compare_image_text, - compare_references + compare_references, + compare_unique_train_records ) from .general import ( check_csv, diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py index 81b3dc0..908a387 100644 --- a/desktop_env/evaluators/metrics/docs.py +++ b/desktop_env/evaluators/metrics/docs.py @@ -167,8 +167,12 @@ def compare_docx_files(file1, file2, **options): if ignore_case: p1, p2 = p1.lower(), p2.lower() if p1 != p2: - print(p1) - print(p2) + # show the difference + print("=== First Paragraph ===") + print(f"\033[92m{repr(p1)}\033[0m") # Green color for p1, repr() shows hidden chars + print("=== Second Paragraph ===") + print(f"\033[91m{repr(p2)}\033[0m") # Red color for p2, repr() shows hidden chars + print("=" * 50) # Clear boundary return 0 return 1 @@ -886,3 +890,72 @@ def compare_references(file1, file2, **options): return (result - reference_base_result) / (1 - reference_base_result) else: return 0 + + +def compare_unique_train_records(processed_file, expected_files, **kwargs): + """ + Compares the processed file with a list of expected files containing the + gold standard and the initial document. + expected_files[0] should be the gold standard file. + expected_files[1] should be the initial file. + """ + # Debug logging to understand what we're actually receiving + logger.info(f"DEBUG: processed_file type: {type(processed_file)}, value: {processed_file}") + logger.info(f"DEBUG: expected_files type: {type(expected_files)}, value: {expected_files}") + logger.info(f"DEBUG: kwargs: {kwargs}") + + if not processed_file or not isinstance(expected_files, list) or len(expected_files) < 2: + logger.error("Invalid arguments: processed_file and a list of 2 expected_files are required.") + return 0 + + gold_file = expected_files[0] + initial_file = expected_files[1] + + if not gold_file or not initial_file: + logger.error("Gold file or initial file path is missing from expected_files list.") + return 0 + + # Helper function to get lines and IDs from a file + def get_lines_and_ids_from_file(file_path): + try: + doc = Document(file_path) + lines = [p.text.strip() for p in doc.paragraphs if p.text.strip()] + train_ids = [line.split(',')[1].strip() for line in lines if len(line.split(',')) == 4] + return lines, train_ids + except Exception as e: + logger.error(f"Error opening or parsing file {file_path}: {e}") + return None, None + + # Get data from all three files + processed_lines, processed_train_ids = get_lines_and_ids_from_file(processed_file) + if processed_lines is None: return 0 + + gold_lines, gold_train_ids = get_lines_and_ids_from_file(gold_file) + if gold_lines is None: return 0 + + initial_lines, _ = get_lines_and_ids_from_file(initial_file) + if initial_lines is None: return 0 + initial_lines_set = set(initial_lines) + + # 1. Subset Check: Ensure every processed line was in the initial file + if not set(processed_lines).issubset(initial_lines_set): + logger.error("Processed file contains lines not present in the initial file.") + logger.error(f"Extra lines: {set(processed_lines) - initial_lines_set}") + return 0 + + # 2. Uniqueness Check: Check for duplicates within the processed file + if len(processed_train_ids) != len(set(processed_train_ids)): + logger.error("Duplicate train_ids found in the processed file.") + return 0 + + # 3. Correctness Check: Compare the set of train_ids + if set(processed_train_ids) != set(gold_train_ids): + logger.error("Set of train_ids does not match between processed file and gold file.") + return 0 + + # 4. Line count check + if len(processed_lines) != len(gold_lines): + logger.error("Number of lines does not match between processed file and gold file.") + return 0 + + return 1 diff --git a/desktop_env/evaluators/metrics/vlc.py b/desktop_env/evaluators/metrics/vlc.py index 5af7b50..bb8e5de 100644 --- a/desktop_env/evaluators/metrics/vlc.py +++ b/desktop_env/evaluators/metrics/vlc.py @@ -3,6 +3,7 @@ import os import subprocess from typing import Dict from xml.etree import ElementTree +from urllib.parse import urlparse import acoustid import cv2 @@ -26,15 +27,108 @@ def is_vlc_playing(actual_status_path: str, rule: Dict[str, str]) -> float: tree = ElementTree.fromstring(actual_status) status = tree.find('state').text + logger.info(f"VLC Status: {status}") if status == 'playing': if rule['type'] == 'file_name': - file_info = tree.find('information/category[@name="meta"]/info[@name="filename"]').text + # Try multiple possible paths for file information in VLC XML + file_paths = [ + 'information/category[@name="meta"]/info[@name="filename"]', + 'information/category[@name="meta"]/info[@name="title"]', + 'information/category[@name="meta"]/info[@name="uri"]', + 'information/category[@name="meta"]/info[@name="location"]', + 'information/category[@name="meta"]/info[@name="name"]' + ] + + file_info = None + for path in file_paths: + element = tree.find(path) + if element is not None and element.text: + file_info = element.text + break + if file_info: - return 1 if file_info.endswith(rule['file_name']) else 0 + expected_filename = rule['file_name'] + + # Method 1: Direct filename match (most precise) + actual_basename = os.path.basename(file_info) + if actual_basename == expected_filename: + return 1 + + # Method 2: Endswith match (for backward compatibility) + if file_info.endswith(expected_filename): + return 1 + + # Method 3: For paths, check if expected filename is in the path + if expected_filename in file_info: + # Additional check to avoid false positives + # Make sure it's actually the filename, not just part of a path + if file_info.endswith('/' + expected_filename) or file_info.endswith('\\' + expected_filename): + return 1 + + logger.warning(f"File name mismatch - Expected: {expected_filename}, Found: {file_info}") + return 0 + else: + logger.warning(f"Could not find file information in VLC status XML for rule: {rule}") + return 0 elif rule['type'] == 'url': - file_info = tree.find('information/category[@name="meta"]/info[@name="url"]').text + # Try multiple possible paths for URL information in VLC XML + url_paths = [ + 'information/category[@name="meta"]/info[@name="url"]', + 'information/category[@name="meta"]/info[@name="URI"]', + 'information/category[@name="meta"]/info[@name="location"]', + 'information/category[@name="meta"]/info[@name="title"]', # Sometimes URL is in title for streams + 'information/category[@name="meta"]/info[@name="filename"]', # Sometimes URL is in filename for streams + 'information/category[@name="Stream 0"]/info[@name="Codec"]', # Try stream info + 'information/category[@name="Stream 0"]/info[@name="Type"]', + 'information/category[@name="Stream 0"]/info[@name="Language"]' + ] + + file_info = None + logger.debug(f"Looking for URL: {rule['url']}") + + for path in url_paths: + element = tree.find(path) + if element is not None and element.text: + file_info = element.text + logger.debug(f"Found URL info at '{path}': {file_info}") + break + if file_info: - return 1 if file_info.endswith(rule['url']) else 0 + # For URL comparison, check if the rule URL is contained in the file_info + # This handles cases where VLC might show a longer or modified URL + expected_url = rule['url'] + + # Method 1: Direct URL match + if expected_url in file_info or file_info.endswith(expected_url): + return 1 + + # Method 2: For HLS streams, VLC often shows just the filename instead of full URL + # Check if the file_info matches the filename part of the expected URL + try: + expected_parsed = urlparse(expected_url) + expected_filename = os.path.basename(expected_parsed.path) + + # If VLC shows just the filename (common for HLS streams) + if file_info == expected_filename: + logger.info(f"URL filename match - Expected URL: {expected_url}, VLC shows filename: {file_info}") + return 1 + + # Method 3: Check if both are URLs from the same domain and similar path + if '://' in file_info: # file_info is also a URL + actual_parsed = urlparse(file_info) + # Same domain and similar path structure + if (expected_parsed.netloc == actual_parsed.netloc and + expected_parsed.path in actual_parsed.path): + return 1 + except Exception as e: + logger.debug(f"URL parsing error: {e}") + pass + + logger.warning(f"URL mismatch - Expected: {expected_url}, Found: {file_info}") + return 0 + else: + logger.warning(f"Could not find URL information in VLC status XML for rule: {rule}") + return 0 else: logger.error(f"Unknown type: {rule['type']}") return 0 @@ -130,33 +224,67 @@ def compare_audios(audio_path_1, audio_path_2): Compare two audio files and return a similarity score in the range [0, 1]. audio_path_1, audio_path_2: paths to the audio files to compare """ - # Example Usage: # similarity = compare_audios_simple('path_to_audio1.mp3', 'path_to_audio2.mp3') # print(f'Similarity Score: {similarity}') - # Convert to common format if necessary and load audio if not audio_path_1 or not audio_path_2: return 0 - # Load the audio files and extract MFCC features - y1, sr1 = librosa.load(audio_path_1) - mfcc1 = librosa.feature.mfcc(y=y1, sr=sr1) + y1, y2 = None, None + try: + y1, sr1 = librosa.load(audio_path_1) + except Exception: + logger.warning(f"Could not load audio from {os.path.basename(audio_path_1)}. It might be empty or corrupt.") - y2, sr2 = librosa.load(audio_path_2) - mfcc2 = librosa.feature.mfcc(y=y2, sr=sr2) + try: + y2, sr2 = librosa.load(audio_path_2) + except Exception: + logger.warning(f"Could not load audio from {os.path.basename(audio_path_2)}. It might be empty or corrupt.") + + # Handle cases where one or both audio files are empty or corrupt. + is_y1_bad = (y1 is None) or (y1.shape[0] == 0) + is_y2_bad = (y2 is None) or (y2.shape[0] == 0) + + if is_y1_bad and is_y2_bad: + logger.info("Both audio files are empty or corrupt. Considering them perfectly similar.") + return 1.0 + + if is_y1_bad or is_y2_bad: + logger.warning(f"One audio file is empty/corrupt, the other is not. Similarity is 0.") + return 0.0 + + try: + logger.info(f"Audio 1 ({os.path.basename(audio_path_1)}): sr={sr1}, len={len(y1)}") + logger.info(f"Audio 2 ({os.path.basename(audio_path_2)}): sr={sr2}, len={len(y2)}") + + # Extract MFCC features + mfcc1 = librosa.feature.mfcc(y=y1, sr=sr1) + mfcc2 = librosa.feature.mfcc(y=y2, sr=sr2) + except Exception as e: + logger.error(f"Error during MFCC extraction: {e}") + return 0.0 # Normalize the MFCC features mfcc1 = librosa.util.normalize(mfcc1, axis=1) mfcc2 = librosa.util.normalize(mfcc2, axis=1) + logger.info(f"MFCCs normalized.") # Define a lambda function to compute cosine distance dist_func = lambda x, y: cosine(x, y) # Use the DTW algorithm to find the best alignment path distance, path = fastdtw(mfcc1.T, mfcc2.T, dist=dist_func) + logger.info(f"DTW distance: {distance:.4f}, Path length: {len(path)}") - # Calculate the similarity score, here we use 1/(1+distance) to convert distance to a similarity score - similarity = 1 / (1 + distance) + # Normalize the DTW distance by the length of the alignment path. + if len(path) == 0: + normalized_distance = np.inf + else: + normalized_distance = distance / len(path) + logger.info(f"Normalized DTW distance: {normalized_distance:.4f}") + + # Convert the normalized distance to a similarity score using an exponential decay function. + similarity = np.exp(-normalized_distance) return similarity @@ -204,32 +332,6 @@ def compare_videos(video_path1, video_path2, max_frames_to_check=100, threshold= return 1. -def are_audio_files_similar(mp3_file_path, mp4_file_path): - # Extract audio fingerprint from MP3 file - mp3_fingerprint, mp3_duration = acoustid.fingerprint_file(mp3_file_path) - - # Extract the audio stream from the MP4 file - mp4_audio_path = os.path.splitext(mp4_file_path)[0] + '_extracted.mp3' - try: - subprocess.run(["ffmpeg", "-i", mp4_file_path, "-vn", "-ar", "44100", "-ac", "2", "-ab", "192k", "-f", "mp3", - mp4_audio_path], check=True) - except subprocess.CalledProcessError as e: - print(f"An error occurred during audio extraction from MP4: {e}") - return 0. - - # Extract audio fingerprint from the extracted audio - mp4_fingerprint, mp4_duration = acoustid.fingerprint_file(mp4_audio_path) - - # Clean up temporary extracted audio file - os.remove(mp4_audio_path) - - # Compare fingerprints (rudimentary comparison) - if mp3_duration >= mp4_duration and mp3_fingerprint == mp4_fingerprint: - return 1. - - return 0. - - def check_qt_bgcone(actual_config_path, rule): with open(actual_config_path, 'rb') as file: config_file = file.read().decode('utf-8') diff --git a/desktop_env/evaluators/metrics/vscode.py b/desktop_env/evaluators/metrics/vscode.py index d207aae..82129f6 100644 --- a/desktop_env/evaluators/metrics/vscode.py +++ b/desktop_env/evaluators/metrics/vscode.py @@ -58,16 +58,20 @@ def check_json_settings(actual: str, expected: str, **options) -> float: if not actual: return 0. - with open(actual, 'r') as f: - data = json.load(f) + try: + with open(actual, 'r') as f: + data = json.load(f) + except Exception as e: + return 0.0 expect = expected['expected'] - data_copy = copy.deepcopy(data) - data_copy.update(expect) - if data == data_copy: - return 1.0 - else: - return 0.0 + + # Check if all expected key-value pairs are in the actual data + for key, value in expect.items(): + if key not in data or data[key] != value: + return 0.0 + + return 1.0 def compare_text_file(actual: str, expected: str, **options) -> float: diff --git a/desktop_env/providers/aws/manager.py b/desktop_env/providers/aws/manager.py index 70e78ed..15875d1 100644 --- a/desktop_env/providers/aws/manager.py +++ b/desktop_env/providers/aws/manager.py @@ -5,6 +5,10 @@ import psutil import logging import dotenv import signal + + +INSTANCE_TYPE = "t3.xlarge" + # Load environment variables from .env file dotenv.load_dotenv() @@ -31,37 +35,17 @@ logger.setLevel(logging.INFO) DEFAULT_REGION = "us-east-1" # todo: Add doc for the configuration of image, security group and network interface # todo: public the AMI images -# ami-05e7d7bd279ea4f14 IMAGE_ID_MAP = { - "us-east-1": "ami-03a22c6e501415fb1", + "us-east-1": "ami-0cae20d2680c939d4", "ap-east-1": "ami-0c092a5b8be4116f5", } -INSTANCE_TYPE = "t3.medium" def _allocate_vm(region=DEFAULT_REGION): if region not in IMAGE_ID_MAP: raise ValueError(f"Region {region} is not supported. Supported regions are: {list(IMAGE_ID_MAP.keys())}") - run_instances_params = { - "MaxCount": 1, - "MinCount": 1, - "ImageId": IMAGE_ID_MAP[region], - "InstanceType": INSTANCE_TYPE, - "EbsOptimized": True, - "NetworkInterfaces": [ - { - "SubnetId": os.getenv('AWS_SUBNET_ID'), - "AssociatePublicIpAddress": True, - "DeviceIndex": 0, - "Groups": [ - os.getenv('AWS_SECURITY_GROUP_ID') - ] - } - ] - } - ec2_client = boto3.client('ec2', region_name=region) instance_id = None original_sigint_handler = signal.getsignal(signal.SIGINT) @@ -94,26 +78,76 @@ def _allocate_vm(region=DEFAULT_REGION): signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) + if not os.getenv('AWS_SECURITY_GROUP_ID'): + raise ValueError("AWS_SECURITY_GROUP_ID is not set in the environment variables.") + if not os.getenv('AWS_SUBNET_ID'): + raise ValueError("AWS_SUBNET_ID is not set in the environment variables.") + + run_instances_params = { + "MaxCount": 1, + "MinCount": 1, + "ImageId": IMAGE_ID_MAP[region], + "InstanceType": INSTANCE_TYPE, + "EbsOptimized": True, + "NetworkInterfaces": [ + { + "SubnetId": os.getenv('AWS_SUBNET_ID'), + "AssociatePublicIpAddress": True, + "DeviceIndex": 0, + "Groups": [ + os.getenv('AWS_SECURITY_GROUP_ID') + ] + } + ], + "BlockDeviceMappings": [ + { + "DeviceName": "/dev/sda1", + "Ebs": { + # "VolumeInitializationRate": 300 + "VolumeSize": 30, # Size in GB + "VolumeType": "gp3", # General Purpose SSD + "Throughput": 1000, + "Iops": 4000 # Adjust IOPS as needed + } + } + ] + } + response = ec2_client.run_instances(**run_instances_params) instance_id = response['Instances'][0]['InstanceId'] + + waiter = ec2_client.get_waiter('instance_running') logger.info(f"Waiting for instance {instance_id} to be running...") - ec2_client.get_waiter('instance_running').wait(InstanceIds=[instance_id]) + waiter.wait(InstanceIds=[instance_id]) logger.info(f"Instance {instance_id} is ready.") + + # 获取并显示VNC访问地址 + try: + instance_details = ec2_client.describe_instances(InstanceIds=[instance_id]) + instance = instance_details['Reservations'][0]['Instances'][0] + public_ip = instance.get('PublicIpAddress', '') + if public_ip: + vnc_url = f"http://{public_ip}:5910/vnc.html" + logger.info("="*80) + logger.info(f"🖥️ VNC Web Access URL: {vnc_url}") + logger.info(f"📡 Public IP: {public_ip}") + logger.info(f"🆔 Instance ID: {instance_id}") + logger.info("="*80) + print(f"\n🌐 VNC访问地址: {vnc_url}") + print(f"📍 请在浏览器中打开上述地址进行远程桌面访问\n") + except Exception as e: + logger.warning(f"Failed to get VNC address for instance {instance_id}: {e}") except KeyboardInterrupt: logger.warning("VM allocation interrupted by user (SIGINT).") - raise - except SystemExit: - logger.warning("VM allocation terminated by parent process (SIGTERM).") + if instance_id: + logger.info(f"Terminating instance {instance_id} due to interruption.") + ec2_client.terminate_instances(InstanceIds=[instance_id]) raise except Exception as e: - logger.error(f"Failed to allocate VM in region {region}: {str(e)}") - # try to clean up any resources that were created - try: - if instance_id: - ec2_client.terminate_instances(InstanceIds=[instance_id]) - logger.info(f"Terminated instance {instance_id} due to allocation failure.") - except Exception as cleanup_error: - logger.error(f"May fail to clean up instance {instance_id}: {str(cleanup_error)}") + logger.error(f"Failed to allocate VM: {e}", exc_info=True) + if instance_id: + logger.info(f"Terminating instance {instance_id} due to an error.") + ec2_client.terminate_instances(InstanceIds=[instance_id]) raise finally: # Restore original signal handlers @@ -153,6 +187,27 @@ def _allocate_vm_with_proxy(region=DEFAULT_REGION, proxy_config_file=None): subnet_id=os.getenv('AWS_SUBNET_ID') ) + try: + ec2_client = boto3.client('ec2', region_name=region) + instance_details = ec2_client.describe_instances(InstanceIds=[instance_id]) + instance = instance_details['Reservations'][0]['Instances'][0] + public_ip = instance.get('PublicIpAddress', '') + if public_ip: + vnc_url = f"http://{public_ip}:5910/vnc.html" + logger.info("="*80) + logger.info(f"🖥️ VNC Web Access URL: {vnc_url}") + logger.info(f"📡 Public IP: {public_ip}") + logger.info(f"🆔 Instance ID: {instance_id}") + if current_proxy: + logger.info(f"🌐 Proxy: {current_proxy.host}:{current_proxy.port}") + logger.info("="*80) + print(f"\n🌐 VNC Web Access URL: {vnc_url}") + if current_proxy: + print(f"🔄 Current Proxy: {current_proxy.host}:{current_proxy.port}") + print(f"📍 Please open the above address in the browser for remote desktop access\n") + except Exception as e: + logger.warning(f"Failed to get VNC address for proxy instance {instance_id}: {e}") + return instance_id @@ -213,4 +268,4 @@ class AWSVMManager(VMManager): else: logger.info("Allocating a new VM in region: {}".format(region)) new_vm_path = _allocate_vm(region) - return new_vm_path + return new_vm_path \ No newline at end of file diff --git a/desktop_env/providers/aws/provider.py b/desktop_env/providers/aws/provider.py index 882710e..d2c034e 100644 --- a/desktop_env/providers/aws/provider.py +++ b/desktop_env/providers/aws/provider.py @@ -63,10 +63,24 @@ class AWSProvider(Provider): for reservation in response['Reservations']: for instance in reservation['Instances']: private_ip_address = instance.get('PrivateIpAddress', '') + public_ip_address = instance.get('PublicIpAddress', '') + + if public_ip_address: + vnc_url = f"http://{public_ip_address}:5910/vnc.html" + logger.info("="*80) + logger.info(f"🖥️ VNC Web Access URL: {vnc_url}") + logger.info(f"📡 Public IP: {public_ip_address}") + logger.info(f"🏠 Private IP: {private_ip_address}") + logger.info("="*80) + print(f"\n🌐 VNC Web Access URL: {vnc_url}") + print(f"📍 Please open the above address in the browser for remote desktop access\n") + else: + logger.warning("No public IP address available for VNC access") + return private_ip_address return '' # Return an empty string if no IP address is found except ClientError as e: - logger.error(f"Failed to retrieve private IP address for the instance {path_to_vm}: {str(e)}") + logger.error(f"Failed to retrieve IP address for the instance {path_to_vm}: {str(e)}") raise def save_state(self, path_to_vm: str, snapshot_name: str): @@ -74,7 +88,7 @@ class AWSProvider(Provider): ec2_client = boto3.client('ec2', region_name=self.region) try: - image_response = ec2_client.create_image(InstanceId=path_to_vm, ImageId=snapshot_name) + image_response = ec2_client.create_image(InstanceId=path_to_vm, Name=snapshot_name) image_id = image_response['ImageId'] logger.info(f"AMI {image_id} created successfully from instance {path_to_vm}.") return image_id @@ -83,7 +97,7 @@ class AWSProvider(Provider): raise def revert_to_snapshot(self, path_to_vm: str, snapshot_name: str): - logger.info(f"Reverting AWS VM to snapshot: {snapshot_name}...") + logger.info(f"Reverting AWS VM to snapshot AMI: {snapshot_name}...") ec2_client = boto3.client('ec2', region_name=self.region) try: @@ -93,37 +107,65 @@ class AWSProvider(Provider): security_groups = [sg['GroupId'] for sg in instance['SecurityGroups']] subnet_id = instance['SubnetId'] instance_type = instance['InstanceType'] - instance_snapshot = instance_details['Reservations'][0]['Instances'][0]['ImageId'] - + # Step 2: Terminate the old instance ec2_client.terminate_instances(InstanceIds=[path_to_vm]) logger.info(f"Old instance {path_to_vm} has been terminated.") - # Step 3: Launch a new instance from the snapshot - logger.info(f"Launching a new instance from snapshot {instance_snapshot}...") - - - new_instance = ec2_client.run_instances( - MaxCount = 1, - MinCount = 1, - ImageId = instance_snapshot, - InstanceType = instance_type, - EbsOptimized = True, - NetworkInterfaces = [ + # Step 3: Launch a new instance from the snapshot(AMI) with performance optimization + logger.info(f"Launching a new instance from AMI {snapshot_name}...") + + run_instances_params = { + "MaxCount": 1, + "MinCount": 1, + "ImageId": snapshot_name, + "InstanceType": instance_type, + "EbsOptimized": True, + "NetworkInterfaces": [ { "SubnetId": subnet_id, "AssociatePublicIpAddress": True, "DeviceIndex": 0, "Groups": security_groups } + ], + "BlockDeviceMappings": [ + { + "DeviceName": "/dev/sda1", + "Ebs": { + # "VolumeInitializationRate": 300 + "VolumeSize": 30, # Size in GB + "VolumeType": "gp3", # General Purpose SSD + "Throughput": 1000, + "Iops": 4000 # Adjust IOPS as needed + } + } ] - ) + } + + new_instance = ec2_client.run_instances(**run_instances_params) new_instance_id = new_instance['Instances'][0]['InstanceId'] - logger.info(f"New instance {new_instance_id} launched from snapshot {snapshot_name}.") + logger.info(f"New instance {new_instance_id} launched from AMI {snapshot_name}.") logger.info(f"Waiting for instance {new_instance_id} to be running...") ec2_client.get_waiter('instance_running').wait(InstanceIds=[new_instance_id]) logger.info(f"Instance {new_instance_id} is ready.") + + try: + instance_details = ec2_client.describe_instances(InstanceIds=[new_instance_id]) + instance = instance_details['Reservations'][0]['Instances'][0] + public_ip = instance.get('PublicIpAddress', '') + if public_ip: + vnc_url = f"http://{public_ip}:5910/vnc.html" + logger.info("="*80) + logger.info(f"🖥️ New Instance VNC Web Access URL: {vnc_url}") + logger.info(f"📡 Public IP: {public_ip}") + logger.info(f"🆔 New Instance ID: {new_instance_id}") + logger.info("="*80) + print(f"\n🌐 New Instance VNC Web Access URL: {vnc_url}") + print(f"📍 Please open the above address in the browser for remote desktop access\n") + except Exception as e: + logger.warning(f"Failed to get VNC address for new instance {new_instance_id}: {e}") return new_instance_id diff --git a/desktop_env/providers/aws/provider_with_proxy.py b/desktop_env/providers/aws/provider_with_proxy.py index d7cfa0e..2ffb7c0 100644 --- a/desktop_env/providers/aws/provider_with_proxy.py +++ b/desktop_env/providers/aws/provider_with_proxy.py @@ -163,16 +163,34 @@ echo "$(date): Configured proxy {self.current_proxy.host}:{self.current_proxy.po logger.info(f"Created new instance {instance_id} with proxy configuration") - # 等待实例运行 logger.info(f"Waiting for instance {instance_id} to be running...") ec2_client.get_waiter('instance_running').wait(InstanceIds=[instance_id]) logger.info(f"Instance {instance_id} is ready.") + + try: + instance_details = ec2_client.describe_instances(InstanceIds=[instance_id]) + instance = instance_details['Reservations'][0]['Instances'][0] + public_ip = instance.get('PublicIpAddress', '') + if public_ip: + vnc_url = f"http://{public_ip}:5910/vnc.html" + logger.info("="*80) + logger.info(f"🖥️ VNC Web Access URL: {vnc_url}") + logger.info(f"📡 Public IP: {public_ip}") + logger.info(f"🆔 Instance ID: {instance_id}") + if self.current_proxy: + logger.info(f"🌐 Proxy: {self.current_proxy.host}:{self.current_proxy.port}") + logger.info("="*80) + print(f"\n🌐 VNC Web Access URL: {vnc_url}") + if self.current_proxy: + print(f"🔄 Current Proxy: {self.current_proxy.host}:{self.current_proxy.port}") + print(f"📍 Please open the above address in the browser for remote desktop access\n") + except Exception as e: + logger.warning(f"Failed to get VNC address for instance {instance_id}: {e}") return instance_id except ClientError as e: logger.error(f"Failed to create instance with proxy: {str(e)}") - # 如果当前代理失败,尝试轮换代理 if self.current_proxy: proxy_pool = get_global_proxy_pool() proxy_pool.mark_proxy_failed(self.current_proxy) @@ -188,10 +206,28 @@ echo "$(date): Configured proxy {self.current_proxy.host}:{self.current_proxy.po for reservation in response['Reservations']: for instance in reservation['Instances']: private_ip_address = instance.get('PrivateIpAddress', '') + public_ip_address = instance.get('PublicIpAddress', '') + + if public_ip_address: + vnc_url = f"http://{public_ip_address}:5910/vnc.html" + logger.info("="*80) + logger.info(f"🖥️ VNC Web Access URL: {vnc_url}") + logger.info(f"📡 Public IP: {public_ip_address}") + logger.info(f"🏠 Private IP: {private_ip_address}") + if self.current_proxy: + logger.info(f"🌐 Proxy: {self.current_proxy.host}:{self.current_proxy.port}") + logger.info("="*80) + print(f"\n🌐 VNC Web Access URL: {vnc_url}") + if self.current_proxy: + print(f"🔄 Current Proxy: {self.current_proxy.host}:{self.current_proxy.port}") + print(f"📍 Please open the above address in the browser for remote desktop access\n") + else: + logger.warning("No public IP address available for VNC access") + return private_ip_address return '' except ClientError as e: - logger.error(f"Failed to retrieve private IP address for the instance {path_to_vm}: {str(e)}") + logger.error(f"Failed to retrieve IP address for the instance {path_to_vm}: {str(e)}") raise def save_state(self, path_to_vm: str, snapshot_name: str): @@ -212,24 +248,28 @@ echo "$(date): Configured proxy {self.current_proxy.host}:{self.current_proxy.po ec2_client = boto3.client('ec2', region_name=self.region) try: - # 获取原实例详情 + # Get original instance details for config. instance_details = ec2_client.describe_instances(InstanceIds=[path_to_vm]) instance = instance_details['Reservations'][0]['Instances'][0] security_groups = [sg['GroupId'] for sg in instance['SecurityGroups']] subnet_id = instance['SubnetId'] instance_type = instance['InstanceType'] - # 终止旧实例 + # Terminate the old instance. This is a non-blocking call. + logger.info(f"Initiating termination for old instance {path_to_vm}...") ec2_client.terminate_instances(InstanceIds=[path_to_vm]) - logger.info(f"Old instance {path_to_vm} has been terminated.") + logger.info(f"Old instance {path_to_vm} termination initiated.") - # 轮换到新的代理 + # Rotate to a new proxy self._rotate_proxy() - # 创建新实例 + # Create a new instance new_instance_id = self.create_instance_with_proxy( snapshot_name, instance_type, security_groups, subnet_id ) + + # Note: VNC address is displayed within create_instance_with_proxy + logger.info(f"Successfully launched new instance {new_instance_id} for revert.") return new_instance_id diff --git a/desktop_env/providers/vmware/manager.py b/desktop_env/providers/vmware/manager.py index 78e11d3..f3384fd 100644 --- a/desktop_env/providers/vmware/manager.py +++ b/desktop_env/providers/vmware/manager.py @@ -256,6 +256,8 @@ def _install_vm(vm_name, vms_dir, downloaded_file_name, os_type, original_vm_nam # Try downloading the screenshot until successful while not download_screenshot(vm_ip): + # Try to get the IP again in case it has changed + vm_ip = get_vm_ip(vm_path) logger.info("Check whether the virtual machine is ready...") logger.info("Virtual machine is ready. Start to make a snapshot on the virtual machine. It would take a while...") diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py index 77ed4d4..606d0ac 100644 --- a/desktop_env/server/main.py +++ b/desktop_env/server/main.py @@ -4,6 +4,7 @@ import platform import shlex import json import subprocess, signal +import time from pathlib import Path from typing import Any, Optional, Sequence from typing import List, Dict, Tuple, Literal @@ -65,6 +66,8 @@ app = Flask(__name__) pyautogui.PAUSE = 0 pyautogui.DARWIN_CATCH_UP_TIME = 0 +TIMEOUT = 1800 # seconds + logger = app.logger recording_process = None # fixme: this is a temporary solution for recording, need to be changed to support multiple-process recording_path = "/tmp/recording.mp4" @@ -114,6 +117,113 @@ def execute_command(): }), 500 +@app.route('/setup/execute_with_verification', methods=['POST']) +@app.route('/execute_with_verification', methods=['POST']) +def execute_command_with_verification(): + """Execute command and verify the result based on provided verification criteria""" + data = request.json + shell = data.get('shell', False) + command = data.get('command', "" if shell else []) + verification = data.get('verification', {}) + max_wait_time = data.get('max_wait_time', 10) # Maximum wait time in seconds + check_interval = data.get('check_interval', 1) # Check interval in seconds + + if isinstance(command, str) and not shell: + command = shlex.split(command) + + # Expand user directory + for i, arg in enumerate(command): + if arg.startswith("~/"): + command[i] = os.path.expanduser(arg) + + # Execute the main command + try: + if platform_name == "Windows": + flags = subprocess.CREATE_NO_WINDOW + else: + flags = 0 + result = subprocess.run( + command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=shell, + text=True, + timeout=120, + creationflags=flags, + ) + + # If no verification is needed, return immediately + if not verification: + return jsonify({ + 'status': 'success', + 'output': result.stdout, + 'error': result.stderr, + 'returncode': result.returncode + }) + + # Wait and verify the result + import time + start_time = time.time() + while time.time() - start_time < max_wait_time: + verification_passed = True + + # Check window existence if specified + if 'window_exists' in verification: + window_name = verification['window_exists'] + try: + if platform_name == 'Linux': + wmctrl_result = subprocess.run(['wmctrl', '-l'], + capture_output=True, text=True, check=True) + if window_name.lower() not in wmctrl_result.stdout.lower(): + verification_passed = False + elif platform_name in ['Windows', 'Darwin']: + import pygetwindow as gw + windows = gw.getWindowsWithTitle(window_name) + if not windows: + verification_passed = False + except Exception: + verification_passed = False + + # Check command execution if specified + if 'command_success' in verification: + verify_cmd = verification['command_success'] + try: + verify_result = subprocess.run(verify_cmd, shell=True, + capture_output=True, text=True, timeout=5) + if verify_result.returncode != 0: + verification_passed = False + except Exception: + verification_passed = False + + if verification_passed: + return jsonify({ + 'status': 'success', + 'output': result.stdout, + 'error': result.stderr, + 'returncode': result.returncode, + 'verification': 'passed', + 'wait_time': time.time() - start_time + }) + + time.sleep(check_interval) + + # Verification failed + return jsonify({ + 'status': 'verification_failed', + 'output': result.stdout, + 'error': result.stderr, + 'returncode': result.returncode, + 'verification': 'failed', + 'wait_time': max_wait_time + }), 500 + + except Exception as e: + return jsonify({ + 'status': 'error', + 'message': str(e) + }), 500 + + def _get_machine_architecture() -> str: """ Get the machine architecture, e.g., x86_64, arm64, aarch64, i386, etc. """ @@ -202,8 +312,8 @@ def capture_screen_with_cursor(): pos = (round(pos_win[0]*ratio - hotspotx), round(pos_win[1]*ratio - hotspoty)) img.paste(cursor, pos, cursor) - except: - pass + except Exception as e: + logger.warning(f"Failed to capture cursor on Windows, screenshot will not have a cursor. Error: {e}") img.save(file_path) elif user_platform == "Linux": @@ -1025,11 +1135,21 @@ def get_file(): return jsonify({"error": "file_path is required"}), 400 try: + # Check if the file exists and get its size + if not os.path.exists(file_path): + return jsonify({"error": "File not found"}), 404 + + file_size = os.path.getsize(file_path) + logger.info(f"Serving file: {file_path} ({file_size} bytes)") + # Check if the file exists and send it to the user return send_file(file_path, as_attachment=True) except FileNotFoundError: # If the file is not found, return a 404 error return jsonify({"error": "File not found"}), 404 + except Exception as e: + logger.error(f"Error serving file {file_path}: {e}") + return jsonify({"error": f"Failed to serve file: {str(e)}"}), 500 @app.route("/setup/upload", methods=["POST"]) @@ -1038,8 +1158,27 @@ def upload_file(): if 'file_path' in request.form and 'file_data' in request.files: file_path = os.path.expandvars(os.path.expanduser(request.form['file_path'])) file = request.files["file_data"] - file.save(file_path) - return "File Uploaded" + + try: + # Ensure target directory exists + os.makedirs(os.path.dirname(file_path), exist_ok=True) + + # Save file and get size for verification + file.save(file_path) + uploaded_size = os.path.getsize(file_path) + + logger.info(f"File uploaded successfully: {file_path} ({uploaded_size} bytes)") + return f"File Uploaded: {uploaded_size} bytes" + + except Exception as e: + logger.error(f"Error uploading file to {file_path}: {e}") + # Clean up partial file if it exists + if os.path.exists(file_path): + try: + os.remove(file_path) + except: + pass + return jsonify({"error": f"Failed to upload file: {str(e)}"}), 500 else: return jsonify({"error": "file_path and file_data are required"}), 400 @@ -1098,20 +1237,45 @@ def download_file(): max_retries = 3 error: Optional[Exception] = None + for i in range(max_retries): try: - response = requests.get(url, stream=True) + logger.info(f"Download attempt {i+1}/{max_retries} for {url}") + response = requests.get(url, stream=True, timeout=300) response.raise_for_status() + + # Get expected file size if available + total_size = int(response.headers.get('content-length', 0)) + if total_size > 0: + logger.info(f"Expected file size: {total_size / (1024*1024):.2f} MB") + downloaded_size = 0 with open(path, 'wb') as f: for chunk in response.iter_content(chunk_size=8192): if chunk: f.write(chunk) - return "File downloaded successfully" + downloaded_size += len(chunk) + if total_size > 0 and downloaded_size % (1024*1024) == 0: # Log every MB + progress = (downloaded_size / total_size) * 100 + logger.info(f"Download progress: {progress:.1f}%") + + # Verify download completeness + actual_size = os.path.getsize(path) + if total_size > 0 and actual_size != total_size: + raise Exception(f"Download incomplete. Expected {total_size} bytes, got {actual_size} bytes") + + logger.info(f"File downloaded successfully: {path} ({actual_size} bytes)") + return f"File downloaded successfully: {actual_size} bytes" - except requests.RequestException as e: + except (requests.RequestException, Exception) as e: error = e - logger.error(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)") + logger.error(f"Failed to download {url}: {e}. Retrying... ({max_retries - i - 1} attempts left)") + # Clean up partial download + if path.exists(): + try: + path.unlink() + except: + pass return f"Failed to download {url}. No retries left. Error: {error}", 500 @@ -1124,18 +1288,88 @@ def open_file(): if not path: return "Path not supplied!", 400 - path = Path(os.path.expandvars(os.path.expanduser(path))) + path_obj = Path(os.path.expandvars(os.path.expanduser(path))) - if not path.exists(): - return f"File not found: {path}", 404 + # Check if it's a file path that exists + is_file_path = path_obj.exists() + + # If it's not a file path, treat it as an application name/command + if not is_file_path: + # Check if it's a valid command by trying to find it in PATH + import shutil + if not shutil.which(path): + return f"Application/file not found: {path}", 404 try: - if platform.system() == "Windows": - os.startfile(path) + if is_file_path: + # Handle file opening + if platform.system() == "Windows": + os.startfile(path_obj) + else: + open_cmd: str = "open" if platform.system() == "Darwin" else "xdg-open" + subprocess.Popen([open_cmd, str(path_obj)]) + file_name = path_obj.name + file_name_without_ext, _ = os.path.splitext(file_name) else: - open_cmd: str = "open" if platform.system() == "Darwin" else "xdg-open" - subprocess.Popen([open_cmd, str(path)]) - return "File opened successfully" + # Handle application launching + if platform.system() == "Windows": + subprocess.Popen([path]) + else: + subprocess.Popen([path]) + file_name = path + file_name_without_ext = path + + # Wait for the file/application to open + + start_time = time.time() + window_found = False + + while time.time() - start_time < TIMEOUT: + os_name = platform.system() + if os_name in ['Windows', 'Darwin']: + import pygetwindow as gw + # Check for window title containing file name or file name without extension + windows = gw.getWindowsWithTitle(file_name) + if not windows: + windows = gw.getWindowsWithTitle(file_name_without_ext) + + if windows: + # To be more specific, we can try to activate it + windows[0].activate() + window_found = True + break + elif os_name == 'Linux': + try: + # Using wmctrl to list windows and check if any window title contains the filename + result = subprocess.run(['wmctrl', '-l'], capture_output=True, text=True, check=True) + window_list = result.stdout.strip().split('\n') + if not result.stdout.strip(): + pass # No windows, just continue waiting + else: + for window in window_list: + if file_name in window or file_name_without_ext in window: + # a window is found, now activate it + window_id = window.split()[0] + subprocess.run(['wmctrl', '-i', '-a', window_id], check=True) + window_found = True + break + if window_found: + break + except (subprocess.CalledProcessError, FileNotFoundError): + # wmctrl might not be installed or the window manager isn't ready. + # We just log it once and let the main loop retry. + if 'wmctrl_failed_once' not in locals(): + logger.warning("wmctrl command is not ready, will keep retrying...") + wmctrl_failed_once = True + pass # Let the outer loop retry + + time.sleep(1) + + if window_found: + return "File opened and window activated successfully" + else: + return f"Failed to find window for {file_name} within {timeout} seconds.", 500 + except Exception as e: return f"Failed to open {path}. Error: {e}", 500 @@ -1258,37 +1492,78 @@ def close_window(): @app.route('/start_recording', methods=['POST']) def start_recording(): global recording_process - if recording_process: + if recording_process and recording_process.poll() is None: return jsonify({'status': 'error', 'message': 'Recording is already in progress.'}), 400 + # Clean up previous recording if it exists + if os.path.exists(recording_path): + try: + os.remove(recording_path) + except OSError as e: + logger.error(f"Error removing old recording file: {e}") + return jsonify({'status': 'error', 'message': f'Failed to remove old recording file: {e}'}), 500 + d = display.Display() screen_width = d.screen().width_in_pixels screen_height = d.screen().height_in_pixels start_command = f"ffmpeg -y -f x11grab -draw_mouse 1 -s {screen_width}x{screen_height} -i :0.0 -c:v libx264 -r 30 {recording_path}" - recording_process = subprocess.Popen(shlex.split(start_command), stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL) + # Use stderr=PIPE to capture potential errors from ffmpeg + recording_process = subprocess.Popen(shlex.split(start_command), + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + text=True # To get stderr as string + ) - return jsonify({'status': 'success', 'message': 'Started recording.'}) + # Wait a couple of seconds to see if ffmpeg starts successfully + try: + # Wait for 2 seconds. If ffmpeg exits within this time, it's an error. + recording_process.wait(timeout=2) + # If wait() returns, it means the process has terminated. + error_output = recording_process.stderr.read() + return jsonify({ + 'status': 'error', + 'message': f'Failed to start recording. ffmpeg terminated unexpectedly. Error: {error_output}' + }), 500 + except subprocess.TimeoutExpired: + # This is the expected outcome: the process is still running after 2 seconds. + return jsonify({'status': 'success', 'message': 'Started recording successfully.'}) @app.route('/end_recording', methods=['POST']) def end_recording(): global recording_process - if not recording_process: + if not recording_process or recording_process.poll() is not None: + recording_process = None # Clean up stale process object return jsonify({'status': 'error', 'message': 'No recording in progress to stop.'}), 400 - recording_process.send_signal(signal.SIGINT) - recording_process.wait() - recording_process = None + error_output = "" + try: + # Send SIGINT for a graceful shutdown, allowing ffmpeg to finalize the file. + recording_process.send_signal(signal.SIGINT) + # Wait for ffmpeg to terminate. communicate() gets output and waits. + _, error_output = recording_process.communicate(timeout=15) + except subprocess.TimeoutExpired: + logger.error("ffmpeg did not respond to SIGINT, killing the process.") + recording_process.kill() + # After killing, communicate to get any remaining output. + _, error_output = recording_process.communicate() + recording_process = None + return jsonify({ + 'status': 'error', + 'message': f'Recording process was unresponsive and had to be killed. Stderr: {error_output}' + }), 500 - # return recording video file - if os.path.exists(recording_path): + recording_process = None # Clear the process from global state + + # Check if the recording file was created and is not empty. + if os.path.exists(recording_path) and os.path.getsize(recording_path) > 0: return send_file(recording_path, as_attachment=True) else: - return abort(404, description="Recording failed") + logger.error(f"Recording failed. The output file is missing or empty. ffmpeg stderr: {error_output}") + return abort(500, description=f"Recording failed. The output file is missing or empty. ffmpeg stderr: {error_output}") if __name__ == '__main__': diff --git a/evaluation_examples/examples/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31.json b/evaluation_examples/examples/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31.json index d192413..dd20e95 100644 --- a/evaluation_examples/examples/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31.json +++ b/evaluation_examples/examples/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31.json @@ -27,17 +27,57 @@ "libreoffice_writer" ], "evaluator": { - "func": "compare_pdfs", - "expected": { + "func": [ + "compare_pdfs", + "compare_pdfs", + "compare_pdfs", + "compare_pdfs" + ], + "conj": "or", + "expected": [ + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf", + "dest": "Constitution_Template_With_Guidelines_Gold_1.pdf" + }, + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf", + "dest": "Constitution_Template_With_Guidelines_Gold_2.pdf" + }, + { "type": "cloud_file", "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf", - "dest": "Constitution_Template_With_Guidelines_Gold.pdf" + "dest": "Constitution_Template_With_Guidelines_Gold_3.pdf" }, - "result": { + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf", + "dest": "Constitution_Template_With_Guidelines_Gold_4.pdf" + } + ], + "result": [ + { "type": "vm_file", "path": "/home/user/Desktop/View_Person_Organizational_Summary.pdf", - "dest": "Constitution_Template_With_Guidelines.pdf" - } + "dest": "Constitution_Template_With_Guidelines_1.pdf" + }, + { + "type": "vm_file", + "path": "/home/user/Documents/View_Person_Organizational_Summary.pdf", + "dest": "Constitution_Template_With_Guidelines_2.pdf" + }, + { + "type": "vm_file", + "path": "/home/user/Downloads/View_Person_Organizational_Summary.pdf", + "dest": "Constitution_Template_With_Guidelines_3.pdf" + }, + { + "type": "vm_file", + "path": "/home/user/View_Person_Organizational_Summary.pdf", + "dest": "Constitution_Template_With_Guidelines_4.pdf" + } + ] }, "proxy": false } \ No newline at end of file diff --git a/evaluation_examples/examples/libreoffice_writer/6ada715d-3aae-4a32-a6a7-429b2e43fb93.json b/evaluation_examples/examples/libreoffice_writer/6ada715d-3aae-4a32-a6a7-429b2e43fb93.json index eac54d7..49a817a 100644 --- a/evaluation_examples/examples/libreoffice_writer/6ada715d-3aae-4a32-a6a7-429b2e43fb93.json +++ b/evaluation_examples/examples/libreoffice_writer/6ada715d-3aae-4a32-a6a7-429b2e43fb93.json @@ -38,7 +38,7 @@ "command": [ "python", "-c", - "import pyautogui; import time; time.sleep(5); pyautogui.press(\"down\", presses=8, interval=0.01); time.sleep(1); pyautogui.scroll(-2)" + "import pyautogui; import time; time.sleep(15); pyautogui.press(\"down\", presses=8, interval=0.01); time.sleep(1); pyautogui.scroll(-2)" ] } } @@ -68,12 +68,12 @@ "command": [ "python", "-c", - "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); pyautogui.press('down'); time.sleep(0.5); pyautogui.press('enter');" + "import pyautogui; import time; time.sleep(1); pyautogui.hotkey('ctrl', 's'); time.sleep(3);" ] } } ], - "func": "compare_contains_image", + "func": "compare_docx_images", "result": { "type": "vm_file", "path": "/home/user/Desktop/Viewing_Your_Class_Schedule_and_Textbooks.docx", diff --git a/evaluation_examples/examples/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108.json b/evaluation_examples/examples/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108.json index bb6bbb0..cb632a4 100644 --- a/evaluation_examples/examples/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108.json +++ b/evaluation_examples/examples/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108.json @@ -52,7 +52,7 @@ } } ], - "func": "compare_docx_lines", + "func": "compare_unique_train_records", "result": { "type": "vm_file", "path": "/home/user/Desktop/HK_train_record.docx", @@ -60,8 +60,16 @@ }, "expected": { "type": "cloud_file", - "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108/HK_train_record_Gold.docx", - "dest": "HK_train_record_Gold.docx" + "path": [ + "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108/HK_train_record_Gold.docx", + "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108/HK_train_record.docx" + ], + "dest": [ + "HK_train_record_Gold.docx", + "HK_train_record_Original.docx" + ], + "multi": true, + "gives": [0, 1] } }, "proxy": false diff --git a/evaluation_examples/examples/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48.json b/evaluation_examples/examples/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48.json index 546d957..2bcf614 100644 --- a/evaluation_examples/examples/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48.json +++ b/evaluation_examples/examples/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48.json @@ -52,20 +52,57 @@ } } ], - "func": "compare_docx_files", - "expected": { - "type": "cloud_file", - "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48/CCCH9003_Tutorial_guidelines_Gold.docx", - "dest": "CCCH9003_Tutorial_guidelines_Gold.docx" - }, - "result": { - "type": "vm_file", - "path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx", - "dest": "CCCH9003_Tutorial_guidelines.docx" - }, - "options": { - "ignore_blanks": false - } + "func": [ + "compare_docx_files", + "compare_docx_files", + "compare_docx_files" + ], + "conj": "or", + "expected": [ + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48/CCCH9003_Tutorial_guidelines_Gold_1.docx", + "dest": "CCCH9003_Tutorial_guidelines_Gold_1.docx" + }, + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48/CCCH9003_Tutorial_guidelines_Gold_2.docx", + "dest": "CCCH9003_Tutorial_guidelines_Gold_2.docx" + }, + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48/CCCH9003_Tutorial_guidelines_Gold_3.docx", + "dest": "CCCH9003_Tutorial_guidelines_Gold_3.docx" + } + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx", + "dest": "CCCH9003_Tutorial_guidelines.docx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx", + "dest": "CCCH9003_Tutorial_guidelines.docx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx", + "dest": "CCCH9003_Tutorial_guidelines.docx" + } + ], + "options": [ + { + "ignore_blanks": false + }, + { + "ignore_blanks": false + }, + { + "ignore_blanks": false + } + ] }, "proxy": false } \ No newline at end of file diff --git a/evaluation_examples/examples/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f.json b/evaluation_examples/examples/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f.json index 9abfd48..e7143c5 100644 --- a/evaluation_examples/examples/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f.json +++ b/evaluation_examples/examples/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f.json @@ -47,22 +47,40 @@ "command": [ "python", "-c", - "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); " + "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(2); " ] } } ], - "func": "compare_docx_tables", - "expected": { - "type": "cloud_file", - "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f/Graphemes_Sound_Letter_Patterns_Gold.docx", - "dest": "Graphemes_Sound_Letter_Patterns_Gold.docx" - }, - "result": { - "type": "vm_file", - "path": "/home/user/Desktop/Graphemes_Sound_Letter_Patterns.docx", - "dest": "Graphemes_Sound_Letter_Patterns.docx" - } + "func": [ + "compare_docx_tables", + "compare_docx_tables" + ], + "conj": "or", + "expected": [ + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f/Graphemes_Sound_Letter_Patterns_Gold.docx", + "dest": "Graphemes_Sound_Letter_Patterns_Gold.docx" + }, + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f/Graphemes_Sound_Letter_Patterns_Gold_2.docx", + "dest": "Graphemes_Sound_Letter_Patterns_Gold_2.docx" + } + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/Graphemes_Sound_Letter_Patterns.docx", + "dest": "Graphemes_Sound_Letter_Patterns.docx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/Graphemes_Sound_Letter_Patterns.docx", + "dest": "Graphemes_Sound_Letter_Patterns.docx" + } + ] }, "proxy": false } \ No newline at end of file diff --git a/evaluation_examples/examples/os/4127319a-8b79-4410-b58a-7a151e15f3d7.json b/evaluation_examples/examples/os/4127319a-8b79-4410-b58a-7a151e15f3d7.json index 36be485..eb73a32 100644 --- a/evaluation_examples/examples/os/4127319a-8b79-4410-b58a-7a151e15f3d7.json +++ b/evaluation_examples/examples/os/4127319a-8b79-4410-b58a-7a151e15f3d7.json @@ -50,7 +50,7 @@ "type": "rule", "rules": { "include": [ - "54\n" + "54" ], "exclude": [] } diff --git a/evaluation_examples/examples/vlc/bba3381f-b5eb-4439-bd9e-80c22218d5a7.json b/evaluation_examples/examples/vlc/bba3381f-b5eb-4439-bd9e-80c22218d5a7.json index 90440f6..f4b85b6 100644 --- a/evaluation_examples/examples/vlc/bba3381f-b5eb-4439-bd9e-80c22218d5a7.json +++ b/evaluation_examples/examples/vlc/bba3381f-b5eb-4439-bd9e-80c22218d5a7.json @@ -1,8 +1,8 @@ { "id": "bba3381f-b5eb-4439-bd9e-80c22218d5a7", "snapshot": "base_setup", - "instruction": "Can you start streaming the video from this link for me? https://www.youtube.com/watch?v=pgBsyTKAwLw", - "source": "https://www.quora.com/How-do-I-play-online-videos-using-the-VLC-media-player", + "instruction": "Can you start streaming the video from this link for me? https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8", + "source": "https://developer.apple.com/streaming/examples/ - Apple HLS test streams for developers", "config": [ { "type": "launch", @@ -32,7 +32,7 @@ "type": "rule", "rules": { "type": "url", - "url": "https://www.youtube.com/watch?v=pgBsyTKAwLw" + "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8" } }, "result": { diff --git a/evaluation_examples/examples/vs_code/276cc624-87ea-4f08-ab93-f770e3790175.json b/evaluation_examples/examples/vs_code/276cc624-87ea-4f08-ab93-f770e3790175.json index d35ba53..bc421a9 100644 --- a/evaluation_examples/examples/vs_code/276cc624-87ea-4f08-ab93-f770e3790175.json +++ b/evaluation_examples/examples/vs_code/276cc624-87ea-4f08-ab93-f770e3790175.json @@ -1,7 +1,7 @@ { "id": "276cc624-87ea-4f08-ab93-f770e3790175", "snapshot": "vscode", - "instruction": "Please help me set the current user's line length to 50 characters in VS Code.", + "instruction": "Please help me set the current user's line length for code wrapping to 50 characters in VS Code.", "source": "https://www.quora.com/unanswered/How-do-you-set-the-line-length-in-Visual-Studio-Code", "config": [ { diff --git a/evaluation_examples/examples/vs_code/7c4cc09e-7a92-40dd-8338-b2286535c4ed.json b/evaluation_examples/examples/vs_code/7c4cc09e-7a92-40dd-8338-b2286535c4ed.json index f25886a..8345b1b 100644 --- a/evaluation_examples/examples/vs_code/7c4cc09e-7a92-40dd-8338-b2286535c4ed.json +++ b/evaluation_examples/examples/vs_code/7c4cc09e-7a92-40dd-8338-b2286535c4ed.json @@ -1,7 +1,7 @@ { "id": "7c4cc09e-7a92-40dd-8338-b2286535c4ed", "snapshot": "vscode", - "instruction": "Please help me change the display language of VS Code to \"Arabic\".", + "instruction": "Please help me change the display language of VS Code to \"Arabic\" without using any extensions.", "source": "", "config": [ {