diff --git a/desktop_env/controllers/setup.py b/desktop_env/controllers/setup.py index 4373600..6728370 100644 --- a/desktop_env/controllers/setup.py +++ b/desktop_env/controllers/setup.py @@ -36,6 +36,8 @@ FILE_PATH = os.path.dirname(os.path.abspath(__file__)) init_proxy_pool(PROXY_CONFIG_FILE) # initialize the global proxy pool +MAX_RETRIES = 20 + class SetupController: def __init__(self, vm_ip: str, server_port: int = 5000, chromium_port: int = 9222, vlc_port: int = 8080, cache_dir: str = "cache"): self.vm_ip: str = vm_ip @@ -64,16 +66,16 @@ class SetupController: # make sure connection can be established logger.info(f"try to connect {self.http_server}") retry = 0 - while retry < 50: + while retry < MAX_RETRIES: try: _ = requests.get(self.http_server + "/terminal") break except: time.sleep(5) retry += 1 - logger.info(f"retry: {retry}/50") + logger.info(f"retry: {retry}/{MAX_RETRIES}") - if retry == 50: + if retry == MAX_RETRIES: return False @@ -219,13 +221,14 @@ class SetupController: # send request to server to open file try: - response = requests.post(self.http_server + "/setup" + "/open_file", headers=headers, data=payload) - if response.status_code == 200: - logger.info("Command executed successfully: %s", response.text) - else: - logger.error("Failed to open file. Status code: %s", response.text) + # The server-side call is now blocking and can take time. + # We set a timeout that is slightly longer than the server's timeout (1800s). + response = requests.post(self.http_server + "/setup" + "/open_file", headers=headers, data=payload, timeout=1810) + response.raise_for_status() # This will raise an exception for 4xx and 5xx status codes + logger.info("Command executed successfully: %s", response.text) except requests.exceptions.RequestException as e: - logger.error("An error occurred while trying to send the request: %s", e) + logger.error(f"Failed to open file '{path}'. An error occurred while trying to send the request or the server responded with an error: {e}") + raise Exception(f"Failed to open file '{path}'. An error occurred while trying to send the request or the server responded with an error: {e}") from e def _launch_setup(self, command: Union[str, List[str]], shell: bool = False): if not command: diff --git a/desktop_env/evaluators/getters/file.py b/desktop_env/evaluators/getters/file.py index 9171e7d..f4ab03a 100644 --- a/desktop_env/evaluators/getters/file.py +++ b/desktop_env/evaluators/getters/file.py @@ -80,18 +80,16 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option returned. only support for single file now: time_suffix(bool): optional. defaults to False. if True, append the current time in required format. - time_format(str): optional. defaults to "%Y_%m_%d". format of the time suffix. + time_format(str): optional. defaults to "%Y%m%d_%H%M%S". format of the time suffix. """ - time_format = "%Y_%m_%d" + time_format = "%Y%m%d_%H%M%S" if not config.get("multi", False): paths: List[str] = [config["path"]] dests: List[str] = [config["dest"]] - if "time_suffix" in config.keys() and config["time_suffix"]: - if "time_format" in config.keys(): - time_format = config["time_format"] - # Insert time before . in file type suffix - paths = [p.split(".")[0] + datetime.now().strftime(time_format) + "." + p.split(".")[1] if "." in p else p for p in paths] - dests = [d.split(".")[0] + datetime.now().strftime(time_format) + "." + d.split(".")[1] if "." in d else d for d in dests] + if config.get("time_suffix", False): + time_format = config.get("time_format", time_format) + # Insert time before file extension. + dests = [f"{os.path.splitext(d)[0]}_{datetime.now().strftime(time_format)}{os.path.splitext(d)[1]}" for d in dests] else: paths: List[str] = config["path"] dests: List[str] = config["dest"] diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index 19a450d..79cd248 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -52,7 +52,8 @@ from .docs import ( compare_docx_files_and_ignore_new_lines, compare_docx_images, compare_image_text, - compare_references + compare_references, + compare_unique_train_records ) from .general import ( check_csv, diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py index 81b3dc0..908a387 100644 --- a/desktop_env/evaluators/metrics/docs.py +++ b/desktop_env/evaluators/metrics/docs.py @@ -167,8 +167,12 @@ def compare_docx_files(file1, file2, **options): if ignore_case: p1, p2 = p1.lower(), p2.lower() if p1 != p2: - print(p1) - print(p2) + # show the difference + print("=== First Paragraph ===") + print(f"\033[92m{repr(p1)}\033[0m") # Green color for p1, repr() shows hidden chars + print("=== Second Paragraph ===") + print(f"\033[91m{repr(p2)}\033[0m") # Red color for p2, repr() shows hidden chars + print("=" * 50) # Clear boundary return 0 return 1 @@ -886,3 +890,72 @@ def compare_references(file1, file2, **options): return (result - reference_base_result) / (1 - reference_base_result) else: return 0 + + +def compare_unique_train_records(processed_file, expected_files, **kwargs): + """ + Compares the processed file with a list of expected files containing the + gold standard and the initial document. + expected_files[0] should be the gold standard file. + expected_files[1] should be the initial file. + """ + # Debug logging to understand what we're actually receiving + logger.info(f"DEBUG: processed_file type: {type(processed_file)}, value: {processed_file}") + logger.info(f"DEBUG: expected_files type: {type(expected_files)}, value: {expected_files}") + logger.info(f"DEBUG: kwargs: {kwargs}") + + if not processed_file or not isinstance(expected_files, list) or len(expected_files) < 2: + logger.error("Invalid arguments: processed_file and a list of 2 expected_files are required.") + return 0 + + gold_file = expected_files[0] + initial_file = expected_files[1] + + if not gold_file or not initial_file: + logger.error("Gold file or initial file path is missing from expected_files list.") + return 0 + + # Helper function to get lines and IDs from a file + def get_lines_and_ids_from_file(file_path): + try: + doc = Document(file_path) + lines = [p.text.strip() for p in doc.paragraphs if p.text.strip()] + train_ids = [line.split(',')[1].strip() for line in lines if len(line.split(',')) == 4] + return lines, train_ids + except Exception as e: + logger.error(f"Error opening or parsing file {file_path}: {e}") + return None, None + + # Get data from all three files + processed_lines, processed_train_ids = get_lines_and_ids_from_file(processed_file) + if processed_lines is None: return 0 + + gold_lines, gold_train_ids = get_lines_and_ids_from_file(gold_file) + if gold_lines is None: return 0 + + initial_lines, _ = get_lines_and_ids_from_file(initial_file) + if initial_lines is None: return 0 + initial_lines_set = set(initial_lines) + + # 1. Subset Check: Ensure every processed line was in the initial file + if not set(processed_lines).issubset(initial_lines_set): + logger.error("Processed file contains lines not present in the initial file.") + logger.error(f"Extra lines: {set(processed_lines) - initial_lines_set}") + return 0 + + # 2. Uniqueness Check: Check for duplicates within the processed file + if len(processed_train_ids) != len(set(processed_train_ids)): + logger.error("Duplicate train_ids found in the processed file.") + return 0 + + # 3. Correctness Check: Compare the set of train_ids + if set(processed_train_ids) != set(gold_train_ids): + logger.error("Set of train_ids does not match between processed file and gold file.") + return 0 + + # 4. Line count check + if len(processed_lines) != len(gold_lines): + logger.error("Number of lines does not match between processed file and gold file.") + return 0 + + return 1 diff --git a/desktop_env/providers/aws/manager.py b/desktop_env/providers/aws/manager.py index 6e6dafb..1502083 100644 --- a/desktop_env/providers/aws/manager.py +++ b/desktop_env/providers/aws/manager.py @@ -5,6 +5,9 @@ import psutil import logging import dotenv import signal + +INSTANCE_TYPE = "t3.large" + # Load environment variables from .env file dotenv.load_dotenv() @@ -31,37 +34,17 @@ logger.setLevel(logging.INFO) DEFAULT_REGION = "us-east-1" # todo: Add doc for the configuration of image, security group and network interface # todo: public the AMI images -# ami-05e7d7bd279ea4f14 IMAGE_ID_MAP = { - "us-east-1": "ami-00674d875de9addc1", + "us-east-1": "ami-03a22c6e501415fb1", "ap-east-1": "ami-0c092a5b8be4116f5", } -INSTANCE_TYPE = "t3.medium" def _allocate_vm(region=DEFAULT_REGION): if region not in IMAGE_ID_MAP: raise ValueError(f"Region {region} is not supported. Supported regions are: {list(IMAGE_ID_MAP.keys())}") - run_instances_params = { - "MaxCount": 1, - "MinCount": 1, - "ImageId": IMAGE_ID_MAP[region], - "InstanceType": INSTANCE_TYPE, - "EbsOptimized": True, - "NetworkInterfaces": [ - { - "SubnetId": os.getenv('AWS_SUBNET_ID'), - "AssociatePublicIpAddress": True, - "DeviceIndex": 0, - "Groups": [ - os.getenv('AWS_SECURITY_GROUP_ID') - ] - } - ] - } - ec2_client = boto3.client('ec2', region_name=region) instance_id = None original_sigint_handler = signal.getsignal(signal.SIGINT) @@ -94,26 +77,64 @@ def _allocate_vm(region=DEFAULT_REGION): signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) + if not os.getenv('AWS_SECURITY_GROUP_ID'): + raise ValueError("AWS_SECURITY_GROUP_ID is not set in the environment variables.") + if not os.getenv('AWS_SUBNET_ID'): + raise ValueError("AWS_SUBNET_ID is not set in the environment variables.") + + run_instances_params = { + "MaxCount": 1, + "MinCount": 1, + "ImageId": IMAGE_ID_MAP[region], + "InstanceType": INSTANCE_TYPE, + "EbsOptimized": True, + "NetworkInterfaces": [ + { + "SubnetId": os.getenv('AWS_SUBNET_ID'), + "AssociatePublicIpAddress": True, + "DeviceIndex": 0, + "Groups": [ + os.getenv('AWS_SECURITY_GROUP_ID') + ] + } + ] + } + response = ec2_client.run_instances(**run_instances_params) instance_id = response['Instances'][0]['InstanceId'] + + waiter = ec2_client.get_waiter('instance_running') logger.info(f"Waiting for instance {instance_id} to be running...") - ec2_client.get_waiter('instance_running').wait(InstanceIds=[instance_id]) + waiter.wait(InstanceIds=[instance_id]) logger.info(f"Instance {instance_id} is ready.") + + # 获取并显示VNC访问地址 + try: + instance_details = ec2_client.describe_instances(InstanceIds=[instance_id]) + instance = instance_details['Reservations'][0]['Instances'][0] + public_ip = instance.get('PublicIpAddress', '') + if public_ip: + vnc_url = f"http://{public_ip}:5910/vnc.html" + logger.info("="*80) + logger.info(f"🖥️ VNC Web Access URL: {vnc_url}") + logger.info(f"📡 Public IP: {public_ip}") + logger.info(f"🆔 Instance ID: {instance_id}") + logger.info("="*80) + print(f"\n🌐 VNC访问地址: {vnc_url}") + print(f"📍 请在浏览器中打开上述地址进行远程桌面访问\n") + except Exception as e: + logger.warning(f"Failed to get VNC address for instance {instance_id}: {e}") except KeyboardInterrupt: logger.warning("VM allocation interrupted by user (SIGINT).") - raise - except SystemExit: - logger.warning("VM allocation terminated by parent process (SIGTERM).") + if instance_id: + logger.info(f"Terminating instance {instance_id} due to interruption.") + ec2_client.terminate_instances(InstanceIds=[instance_id]) raise except Exception as e: - logger.error(f"Failed to allocate VM in region {region}: {str(e)}") - # try to clean up any resources that were created - try: - if instance_id: - ec2_client.terminate_instances(InstanceIds=[instance_id]) - logger.info(f"Terminated instance {instance_id} due to allocation failure.") - except Exception as cleanup_error: - logger.error(f"May fail to clean up instance {instance_id}: {str(cleanup_error)}") + logger.error(f"Failed to allocate VM: {e}", exc_info=True) + if instance_id: + logger.info(f"Terminating instance {instance_id} due to an error.") + ec2_client.terminate_instances(InstanceIds=[instance_id]) raise finally: # Restore original signal handlers @@ -153,6 +174,27 @@ def _allocate_vm_with_proxy(region=DEFAULT_REGION, proxy_config_file=None): subnet_id=os.getenv('AWS_SUBNET_ID') ) + try: + ec2_client = boto3.client('ec2', region_name=region) + instance_details = ec2_client.describe_instances(InstanceIds=[instance_id]) + instance = instance_details['Reservations'][0]['Instances'][0] + public_ip = instance.get('PublicIpAddress', '') + if public_ip: + vnc_url = f"http://{public_ip}:5910/vnc.html" + logger.info("="*80) + logger.info(f"🖥️ VNC Web Access URL: {vnc_url}") + logger.info(f"📡 Public IP: {public_ip}") + logger.info(f"🆔 Instance ID: {instance_id}") + if current_proxy: + logger.info(f"🌐 Proxy: {current_proxy.host}:{current_proxy.port}") + logger.info("="*80) + print(f"\n🌐 VNC Web Access URL: {vnc_url}") + if current_proxy: + print(f"🔄 Current Proxy: {current_proxy.host}:{current_proxy.port}") + print(f"📍 Please open the above address in the browser for remote desktop access\n") + except Exception as e: + logger.warning(f"Failed to get VNC address for proxy instance {instance_id}: {e}") + return instance_id @@ -213,4 +255,4 @@ class AWSVMManager(VMManager): else: logger.info("Allocating a new VM in region: {}".format(region)) new_vm_path = _allocate_vm(region) - return new_vm_path + return new_vm_path \ No newline at end of file diff --git a/desktop_env/providers/aws/provider.py b/desktop_env/providers/aws/provider.py index 882710e..d2a87c8 100644 --- a/desktop_env/providers/aws/provider.py +++ b/desktop_env/providers/aws/provider.py @@ -63,10 +63,24 @@ class AWSProvider(Provider): for reservation in response['Reservations']: for instance in reservation['Instances']: private_ip_address = instance.get('PrivateIpAddress', '') + public_ip_address = instance.get('PublicIpAddress', '') + + if public_ip_address: + vnc_url = f"http://{public_ip_address}:5910/vnc.html" + logger.info("="*80) + logger.info(f"🖥️ VNC Web Access URL: {vnc_url}") + logger.info(f"📡 Public IP: {public_ip_address}") + logger.info(f"🏠 Private IP: {private_ip_address}") + logger.info("="*80) + print(f"\n🌐 VNC Web Access URL: {vnc_url}") + print(f"📍 Please open the above address in the browser for remote desktop access\n") + else: + logger.warning("No public IP address available for VNC access") + return private_ip_address return '' # Return an empty string if no IP address is found except ClientError as e: - logger.error(f"Failed to retrieve private IP address for the instance {path_to_vm}: {str(e)}") + logger.error(f"Failed to retrieve IP address for the instance {path_to_vm}: {str(e)}") raise def save_state(self, path_to_vm: str, snapshot_name: str): @@ -74,7 +88,7 @@ class AWSProvider(Provider): ec2_client = boto3.client('ec2', region_name=self.region) try: - image_response = ec2_client.create_image(InstanceId=path_to_vm, ImageId=snapshot_name) + image_response = ec2_client.create_image(InstanceId=path_to_vm, Name=snapshot_name) image_id = image_response['ImageId'] logger.info(f"AMI {image_id} created successfully from instance {path_to_vm}.") return image_id @@ -83,7 +97,7 @@ class AWSProvider(Provider): raise def revert_to_snapshot(self, path_to_vm: str, snapshot_name: str): - logger.info(f"Reverting AWS VM to snapshot: {snapshot_name}...") + logger.info(f"Reverting AWS VM to snapshot AMI: {snapshot_name}...") ec2_client = boto3.client('ec2', region_name=self.region) try: @@ -93,23 +107,21 @@ class AWSProvider(Provider): security_groups = [sg['GroupId'] for sg in instance['SecurityGroups']] subnet_id = instance['SubnetId'] instance_type = instance['InstanceType'] - instance_snapshot = instance_details['Reservations'][0]['Instances'][0]['ImageId'] - + # Step 2: Terminate the old instance ec2_client.terminate_instances(InstanceIds=[path_to_vm]) logger.info(f"Old instance {path_to_vm} has been terminated.") - # Step 3: Launch a new instance from the snapshot - logger.info(f"Launching a new instance from snapshot {instance_snapshot}...") - - - new_instance = ec2_client.run_instances( - MaxCount = 1, - MinCount = 1, - ImageId = instance_snapshot, - InstanceType = instance_type, - EbsOptimized = True, - NetworkInterfaces = [ + # Step 3: Launch a new instance from the snapshot(AMI) with performance optimization + logger.info(f"Launching a new instance from AMI {snapshot_name}...") + + run_instances_params = { + "MaxCount": 1, + "MinCount": 1, + "ImageId": snapshot_name, + "InstanceType": instance_type, + "EbsOptimized": True, + "NetworkInterfaces": [ { "SubnetId": subnet_id, "AssociatePublicIpAddress": True, @@ -117,13 +129,31 @@ class AWSProvider(Provider): "Groups": security_groups } ] - ) + } + + new_instance = ec2_client.run_instances(**run_instances_params) new_instance_id = new_instance['Instances'][0]['InstanceId'] - logger.info(f"New instance {new_instance_id} launched from snapshot {snapshot_name}.") + logger.info(f"New instance {new_instance_id} launched from AMI {snapshot_name}.") logger.info(f"Waiting for instance {new_instance_id} to be running...") ec2_client.get_waiter('instance_running').wait(InstanceIds=[new_instance_id]) logger.info(f"Instance {new_instance_id} is ready.") + + try: + instance_details = ec2_client.describe_instances(InstanceIds=[new_instance_id]) + instance = instance_details['Reservations'][0]['Instances'][0] + public_ip = instance.get('PublicIpAddress', '') + if public_ip: + vnc_url = f"http://{public_ip}:5910/vnc.html" + logger.info("="*80) + logger.info(f"🖥️ New Instance VNC Web Access URL: {vnc_url}") + logger.info(f"📡 Public IP: {public_ip}") + logger.info(f"🆔 New Instance ID: {new_instance_id}") + logger.info("="*80) + print(f"\n🌐 New Instance VNC Web Access URL: {vnc_url}") + print(f"📍 Please open the above address in the browser for remote desktop access\n") + except Exception as e: + logger.warning(f"Failed to get VNC address for new instance {new_instance_id}: {e}") return new_instance_id diff --git a/desktop_env/providers/aws/provider_with_proxy.py b/desktop_env/providers/aws/provider_with_proxy.py index d7cfa0e..2ffb7c0 100644 --- a/desktop_env/providers/aws/provider_with_proxy.py +++ b/desktop_env/providers/aws/provider_with_proxy.py @@ -163,16 +163,34 @@ echo "$(date): Configured proxy {self.current_proxy.host}:{self.current_proxy.po logger.info(f"Created new instance {instance_id} with proxy configuration") - # 等待实例运行 logger.info(f"Waiting for instance {instance_id} to be running...") ec2_client.get_waiter('instance_running').wait(InstanceIds=[instance_id]) logger.info(f"Instance {instance_id} is ready.") + + try: + instance_details = ec2_client.describe_instances(InstanceIds=[instance_id]) + instance = instance_details['Reservations'][0]['Instances'][0] + public_ip = instance.get('PublicIpAddress', '') + if public_ip: + vnc_url = f"http://{public_ip}:5910/vnc.html" + logger.info("="*80) + logger.info(f"🖥️ VNC Web Access URL: {vnc_url}") + logger.info(f"📡 Public IP: {public_ip}") + logger.info(f"🆔 Instance ID: {instance_id}") + if self.current_proxy: + logger.info(f"🌐 Proxy: {self.current_proxy.host}:{self.current_proxy.port}") + logger.info("="*80) + print(f"\n🌐 VNC Web Access URL: {vnc_url}") + if self.current_proxy: + print(f"🔄 Current Proxy: {self.current_proxy.host}:{self.current_proxy.port}") + print(f"📍 Please open the above address in the browser for remote desktop access\n") + except Exception as e: + logger.warning(f"Failed to get VNC address for instance {instance_id}: {e}") return instance_id except ClientError as e: logger.error(f"Failed to create instance with proxy: {str(e)}") - # 如果当前代理失败,尝试轮换代理 if self.current_proxy: proxy_pool = get_global_proxy_pool() proxy_pool.mark_proxy_failed(self.current_proxy) @@ -188,10 +206,28 @@ echo "$(date): Configured proxy {self.current_proxy.host}:{self.current_proxy.po for reservation in response['Reservations']: for instance in reservation['Instances']: private_ip_address = instance.get('PrivateIpAddress', '') + public_ip_address = instance.get('PublicIpAddress', '') + + if public_ip_address: + vnc_url = f"http://{public_ip_address}:5910/vnc.html" + logger.info("="*80) + logger.info(f"🖥️ VNC Web Access URL: {vnc_url}") + logger.info(f"📡 Public IP: {public_ip_address}") + logger.info(f"🏠 Private IP: {private_ip_address}") + if self.current_proxy: + logger.info(f"🌐 Proxy: {self.current_proxy.host}:{self.current_proxy.port}") + logger.info("="*80) + print(f"\n🌐 VNC Web Access URL: {vnc_url}") + if self.current_proxy: + print(f"🔄 Current Proxy: {self.current_proxy.host}:{self.current_proxy.port}") + print(f"📍 Please open the above address in the browser for remote desktop access\n") + else: + logger.warning("No public IP address available for VNC access") + return private_ip_address return '' except ClientError as e: - logger.error(f"Failed to retrieve private IP address for the instance {path_to_vm}: {str(e)}") + logger.error(f"Failed to retrieve IP address for the instance {path_to_vm}: {str(e)}") raise def save_state(self, path_to_vm: str, snapshot_name: str): @@ -212,24 +248,28 @@ echo "$(date): Configured proxy {self.current_proxy.host}:{self.current_proxy.po ec2_client = boto3.client('ec2', region_name=self.region) try: - # 获取原实例详情 + # Get original instance details for config. instance_details = ec2_client.describe_instances(InstanceIds=[path_to_vm]) instance = instance_details['Reservations'][0]['Instances'][0] security_groups = [sg['GroupId'] for sg in instance['SecurityGroups']] subnet_id = instance['SubnetId'] instance_type = instance['InstanceType'] - # 终止旧实例 + # Terminate the old instance. This is a non-blocking call. + logger.info(f"Initiating termination for old instance {path_to_vm}...") ec2_client.terminate_instances(InstanceIds=[path_to_vm]) - logger.info(f"Old instance {path_to_vm} has been terminated.") + logger.info(f"Old instance {path_to_vm} termination initiated.") - # 轮换到新的代理 + # Rotate to a new proxy self._rotate_proxy() - # 创建新实例 + # Create a new instance new_instance_id = self.create_instance_with_proxy( snapshot_name, instance_type, security_groups, subnet_id ) + + # Note: VNC address is displayed within create_instance_with_proxy + logger.info(f"Successfully launched new instance {new_instance_id} for revert.") return new_instance_id diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py index 77ed4d4..de8009d 100644 --- a/desktop_env/server/main.py +++ b/desktop_env/server/main.py @@ -4,6 +4,7 @@ import platform import shlex import json import subprocess, signal +import time from pathlib import Path from typing import Any, Optional, Sequence from typing import List, Dict, Tuple, Literal @@ -65,6 +66,8 @@ app = Flask(__name__) pyautogui.PAUSE = 0 pyautogui.DARWIN_CATCH_UP_TIME = 0 +TIMEOUT = 1800 # seconds + logger = app.logger recording_process = None # fixme: this is a temporary solution for recording, need to be changed to support multiple-process recording_path = "/tmp/recording.mp4" @@ -202,8 +205,8 @@ def capture_screen_with_cursor(): pos = (round(pos_win[0]*ratio - hotspotx), round(pos_win[1]*ratio - hotspoty)) img.paste(cursor, pos, cursor) - except: - pass + except Exception as e: + logger.warning(f"Failed to capture cursor on Windows, screenshot will not have a cursor. Error: {e}") img.save(file_path) elif user_platform == "Linux": @@ -1124,18 +1127,72 @@ def open_file(): if not path: return "Path not supplied!", 400 - path = Path(os.path.expandvars(os.path.expanduser(path))) + path_obj = Path(os.path.expandvars(os.path.expanduser(path))) - if not path.exists(): - return f"File not found: {path}", 404 + if not path_obj.exists(): + return f"File not found: {path_obj}", 404 try: if platform.system() == "Windows": - os.startfile(path) + os.startfile(path_obj) else: open_cmd: str = "open" if platform.system() == "Darwin" else "xdg-open" - subprocess.Popen([open_cmd, str(path)]) - return "File opened successfully" + subprocess.Popen([open_cmd, str(path_obj)]) + + # Wait for the file to open + file_name = path_obj.name + # Some apps don't include the extension in the title + file_name_without_ext, _ = os.path.splitext(file_name) + + start_time = time.time() + window_found = False + + while time.time() - start_time < TIMEOUT: + os_name = platform.system() + if os_name in ['Windows', 'Darwin']: + import pygetwindow as gw + # Check for window title containing file name or file name without extension + windows = gw.getWindowsWithTitle(file_name) + if not windows: + windows = gw.getWindowsWithTitle(file_name_without_ext) + + if windows: + # To be more specific, we can try to activate it + windows[0].activate() + window_found = True + break + elif os_name == 'Linux': + try: + # Using wmctrl to list windows and check if any window title contains the filename + result = subprocess.run(['wmctrl', '-l'], capture_output=True, text=True, check=True) + window_list = result.stdout.strip().split('\n') + if not result.stdout.strip(): + pass # No windows, just continue waiting + else: + for window in window_list: + if file_name in window or file_name_without_ext in window: + # a window is found, now activate it + window_id = window.split()[0] + subprocess.run(['wmctrl', '-i', '-a', window_id], check=True) + window_found = True + break + if window_found: + break + except (subprocess.CalledProcessError, FileNotFoundError): + # wmctrl might not be installed or the window manager isn't ready. + # We just log it once and let the main loop retry. + if 'wmctrl_failed_once' not in locals(): + logger.warning("wmctrl command is not ready, will keep retrying...") + wmctrl_failed_once = True + pass # Let the outer loop retry + + time.sleep(1) + + if window_found: + return "File opened and window activated successfully" + else: + return f"Failed to find window for {file_name} within {timeout} seconds.", 500 + except Exception as e: return f"Failed to open {path}. Error: {e}", 500 @@ -1258,37 +1315,78 @@ def close_window(): @app.route('/start_recording', methods=['POST']) def start_recording(): global recording_process - if recording_process: + if recording_process and recording_process.poll() is None: return jsonify({'status': 'error', 'message': 'Recording is already in progress.'}), 400 + # Clean up previous recording if it exists + if os.path.exists(recording_path): + try: + os.remove(recording_path) + except OSError as e: + logger.error(f"Error removing old recording file: {e}") + return jsonify({'status': 'error', 'message': f'Failed to remove old recording file: {e}'}), 500 + d = display.Display() screen_width = d.screen().width_in_pixels screen_height = d.screen().height_in_pixels start_command = f"ffmpeg -y -f x11grab -draw_mouse 1 -s {screen_width}x{screen_height} -i :0.0 -c:v libx264 -r 30 {recording_path}" - recording_process = subprocess.Popen(shlex.split(start_command), stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL) + # Use stderr=PIPE to capture potential errors from ffmpeg + recording_process = subprocess.Popen(shlex.split(start_command), + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + text=True # To get stderr as string + ) - return jsonify({'status': 'success', 'message': 'Started recording.'}) + # Wait a couple of seconds to see if ffmpeg starts successfully + try: + # Wait for 2 seconds. If ffmpeg exits within this time, it's an error. + recording_process.wait(timeout=2) + # If wait() returns, it means the process has terminated. + error_output = recording_process.stderr.read() + return jsonify({ + 'status': 'error', + 'message': f'Failed to start recording. ffmpeg terminated unexpectedly. Error: {error_output}' + }), 500 + except subprocess.TimeoutExpired: + # This is the expected outcome: the process is still running after 2 seconds. + return jsonify({'status': 'success', 'message': 'Started recording successfully.'}) @app.route('/end_recording', methods=['POST']) def end_recording(): global recording_process - if not recording_process: + if not recording_process or recording_process.poll() is not None: + recording_process = None # Clean up stale process object return jsonify({'status': 'error', 'message': 'No recording in progress to stop.'}), 400 + error_output = "" + try: + # Send SIGINT for a graceful shutdown, allowing ffmpeg to finalize the file. recording_process.send_signal(signal.SIGINT) - recording_process.wait() + # Wait for ffmpeg to terminate. communicate() gets output and waits. + _, error_output = recording_process.communicate(timeout=15) + except subprocess.TimeoutExpired: + logger.error("ffmpeg did not respond to SIGINT, killing the process.") + recording_process.kill() + # After killing, communicate to get any remaining output. + _, error_output = recording_process.communicate() recording_process = None + return jsonify({ + 'status': 'error', + 'message': f'Recording process was unresponsive and had to be killed. Stderr: {error_output}' + }), 500 - # return recording video file - if os.path.exists(recording_path): + recording_process = None # Clear the process from global state + + # Check if the recording file was created and is not empty. + if os.path.exists(recording_path) and os.path.getsize(recording_path) > 0: return send_file(recording_path, as_attachment=True) else: - return abort(404, description="Recording failed") + logger.error(f"Recording failed. The output file is missing or empty. ffmpeg stderr: {error_output}") + return abort(500, description=f"Recording failed. The output file is missing or empty. ffmpeg stderr: {error_output}") if __name__ == '__main__': diff --git a/evaluation_examples/examples/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31.json b/evaluation_examples/examples/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31.json index d192413..dd20e95 100644 --- a/evaluation_examples/examples/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31.json +++ b/evaluation_examples/examples/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31.json @@ -27,17 +27,57 @@ "libreoffice_writer" ], "evaluator": { - "func": "compare_pdfs", - "expected": { + "func": [ + "compare_pdfs", + "compare_pdfs", + "compare_pdfs", + "compare_pdfs" + ], + "conj": "or", + "expected": [ + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf", + "dest": "Constitution_Template_With_Guidelines_Gold_1.pdf" + }, + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf", + "dest": "Constitution_Template_With_Guidelines_Gold_2.pdf" + }, + { "type": "cloud_file", "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf", - "dest": "Constitution_Template_With_Guidelines_Gold.pdf" + "dest": "Constitution_Template_With_Guidelines_Gold_3.pdf" }, - "result": { + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf", + "dest": "Constitution_Template_With_Guidelines_Gold_4.pdf" + } + ], + "result": [ + { "type": "vm_file", "path": "/home/user/Desktop/View_Person_Organizational_Summary.pdf", - "dest": "Constitution_Template_With_Guidelines.pdf" - } + "dest": "Constitution_Template_With_Guidelines_1.pdf" + }, + { + "type": "vm_file", + "path": "/home/user/Documents/View_Person_Organizational_Summary.pdf", + "dest": "Constitution_Template_With_Guidelines_2.pdf" + }, + { + "type": "vm_file", + "path": "/home/user/Downloads/View_Person_Organizational_Summary.pdf", + "dest": "Constitution_Template_With_Guidelines_3.pdf" + }, + { + "type": "vm_file", + "path": "/home/user/View_Person_Organizational_Summary.pdf", + "dest": "Constitution_Template_With_Guidelines_4.pdf" + } + ] }, "proxy": false } \ No newline at end of file diff --git a/evaluation_examples/examples/libreoffice_writer/6ada715d-3aae-4a32-a6a7-429b2e43fb93.json b/evaluation_examples/examples/libreoffice_writer/6ada715d-3aae-4a32-a6a7-429b2e43fb93.json index eac54d7..49a817a 100644 --- a/evaluation_examples/examples/libreoffice_writer/6ada715d-3aae-4a32-a6a7-429b2e43fb93.json +++ b/evaluation_examples/examples/libreoffice_writer/6ada715d-3aae-4a32-a6a7-429b2e43fb93.json @@ -38,7 +38,7 @@ "command": [ "python", "-c", - "import pyautogui; import time; time.sleep(5); pyautogui.press(\"down\", presses=8, interval=0.01); time.sleep(1); pyautogui.scroll(-2)" + "import pyautogui; import time; time.sleep(15); pyautogui.press(\"down\", presses=8, interval=0.01); time.sleep(1); pyautogui.scroll(-2)" ] } } @@ -68,12 +68,12 @@ "command": [ "python", "-c", - "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); pyautogui.press('down'); time.sleep(0.5); pyautogui.press('enter');" + "import pyautogui; import time; time.sleep(1); pyautogui.hotkey('ctrl', 's'); time.sleep(3);" ] } } ], - "func": "compare_contains_image", + "func": "compare_docx_images", "result": { "type": "vm_file", "path": "/home/user/Desktop/Viewing_Your_Class_Schedule_and_Textbooks.docx", diff --git a/evaluation_examples/examples/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108.json b/evaluation_examples/examples/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108.json index bb6bbb0..cb632a4 100644 --- a/evaluation_examples/examples/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108.json +++ b/evaluation_examples/examples/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108.json @@ -52,7 +52,7 @@ } } ], - "func": "compare_docx_lines", + "func": "compare_unique_train_records", "result": { "type": "vm_file", "path": "/home/user/Desktop/HK_train_record.docx", @@ -60,8 +60,16 @@ }, "expected": { "type": "cloud_file", - "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108/HK_train_record_Gold.docx", - "dest": "HK_train_record_Gold.docx" + "path": [ + "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108/HK_train_record_Gold.docx", + "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108/HK_train_record.docx" + ], + "dest": [ + "HK_train_record_Gold.docx", + "HK_train_record_Original.docx" + ], + "multi": true, + "gives": [0, 1] } }, "proxy": false diff --git a/evaluation_examples/examples/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48.json b/evaluation_examples/examples/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48.json index 546d957..2bcf614 100644 --- a/evaluation_examples/examples/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48.json +++ b/evaluation_examples/examples/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48.json @@ -52,20 +52,57 @@ } } ], - "func": "compare_docx_files", - "expected": { - "type": "cloud_file", - "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48/CCCH9003_Tutorial_guidelines_Gold.docx", - "dest": "CCCH9003_Tutorial_guidelines_Gold.docx" - }, - "result": { - "type": "vm_file", - "path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx", - "dest": "CCCH9003_Tutorial_guidelines.docx" - }, - "options": { - "ignore_blanks": false - } + "func": [ + "compare_docx_files", + "compare_docx_files", + "compare_docx_files" + ], + "conj": "or", + "expected": [ + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48/CCCH9003_Tutorial_guidelines_Gold_1.docx", + "dest": "CCCH9003_Tutorial_guidelines_Gold_1.docx" + }, + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48/CCCH9003_Tutorial_guidelines_Gold_2.docx", + "dest": "CCCH9003_Tutorial_guidelines_Gold_2.docx" + }, + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48/CCCH9003_Tutorial_guidelines_Gold_3.docx", + "dest": "CCCH9003_Tutorial_guidelines_Gold_3.docx" + } + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx", + "dest": "CCCH9003_Tutorial_guidelines.docx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx", + "dest": "CCCH9003_Tutorial_guidelines.docx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx", + "dest": "CCCH9003_Tutorial_guidelines.docx" + } + ], + "options": [ + { + "ignore_blanks": false + }, + { + "ignore_blanks": false + }, + { + "ignore_blanks": false + } + ] }, "proxy": false } \ No newline at end of file diff --git a/evaluation_examples/examples/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f.json b/evaluation_examples/examples/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f.json index 9abfd48..e7143c5 100644 --- a/evaluation_examples/examples/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f.json +++ b/evaluation_examples/examples/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f.json @@ -47,22 +47,40 @@ "command": [ "python", "-c", - "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); " + "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(2); " ] } } ], - "func": "compare_docx_tables", - "expected": { - "type": "cloud_file", - "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f/Graphemes_Sound_Letter_Patterns_Gold.docx", - "dest": "Graphemes_Sound_Letter_Patterns_Gold.docx" - }, - "result": { - "type": "vm_file", - "path": "/home/user/Desktop/Graphemes_Sound_Letter_Patterns.docx", - "dest": "Graphemes_Sound_Letter_Patterns.docx" - } + "func": [ + "compare_docx_tables", + "compare_docx_tables" + ], + "conj": "or", + "expected": [ + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f/Graphemes_Sound_Letter_Patterns_Gold.docx", + "dest": "Graphemes_Sound_Letter_Patterns_Gold.docx" + }, + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f/Graphemes_Sound_Letter_Patterns_Gold_2.docx", + "dest": "Graphemes_Sound_Letter_Patterns_Gold_2.docx" + } + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/Graphemes_Sound_Letter_Patterns.docx", + "dest": "Graphemes_Sound_Letter_Patterns.docx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/Graphemes_Sound_Letter_Patterns.docx", + "dest": "Graphemes_Sound_Letter_Patterns.docx" + } + ] }, "proxy": false } \ No newline at end of file