diff --git a/desktop_env/controllers/setup.py b/desktop_env/controllers/setup.py
index 4373600..6728370 100644
--- a/desktop_env/controllers/setup.py
+++ b/desktop_env/controllers/setup.py
@@ -36,6 +36,8 @@ FILE_PATH = os.path.dirname(os.path.abspath(__file__))
 
 init_proxy_pool(PROXY_CONFIG_FILE)  # initialize the global proxy pool
 
+MAX_RETRIES = 20
+
 class SetupController:
     def __init__(self, vm_ip: str, server_port: int = 5000, chromium_port: int = 9222, vlc_port: int = 8080, cache_dir: str = "cache"):
         self.vm_ip: str = vm_ip
@@ -64,16 +66,16 @@ class SetupController:
         # make sure connection can be established
         logger.info(f"try to connect {self.http_server}")
         retry = 0
-        while retry < 50:
+        while retry < MAX_RETRIES:
             try:
                 _ = requests.get(self.http_server + "/terminal")
                 break
             except:
                 time.sleep(5)
                 retry += 1
-                logger.info(f"retry: {retry}/50")
+                logger.info(f"retry: {retry}/{MAX_RETRIES}")
             
-            if retry == 50:
+            if retry == MAX_RETRIES:
                 return False
                 
 
@@ -219,13 +221,14 @@ class SetupController:
 
         # send request to server to open file
         try:
-            response = requests.post(self.http_server + "/setup" + "/open_file", headers=headers, data=payload)
-            if response.status_code == 200:
-                logger.info("Command executed successfully: %s", response.text)
-            else:
-                logger.error("Failed to open file. Status code: %s", response.text)
+            # The server-side call is now blocking and can take time.
+            # We set a timeout that is slightly longer than the server's timeout (1800s).
+            response = requests.post(self.http_server + "/setup" + "/open_file", headers=headers, data=payload, timeout=1810)
+            response.raise_for_status()  # This will raise an exception for 4xx and 5xx status codes
+            logger.info("Command executed successfully: %s", response.text)
         except requests.exceptions.RequestException as e:
-            logger.error("An error occurred while trying to send the request: %s", e)
+            logger.error(f"Failed to open file '{path}'. An error occurred while trying to send the request or the server responded with an error: {e}")
+            raise Exception(f"Failed to open file '{path}'. An error occurred while trying to send the request or the server responded with an error: {e}") from e
 
     def _launch_setup(self, command: Union[str, List[str]], shell: bool = False):
         if not command:
diff --git a/desktop_env/evaluators/getters/file.py b/desktop_env/evaluators/getters/file.py
index 9171e7d..f4ab03a 100644
--- a/desktop_env/evaluators/getters/file.py
+++ b/desktop_env/evaluators/getters/file.py
@@ -80,18 +80,16 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option
           returned.
         only support for single file now:
         time_suffix(bool): optional. defaults to False. if True, append the current time in required format.
-        time_format(str): optional. defaults to "%Y_%m_%d". format of the time suffix.
+        time_format(str): optional. defaults to "%Y%m%d_%H%M%S". format of the time suffix.
     """
-    time_format = "%Y_%m_%d"
+    time_format = "%Y%m%d_%H%M%S"
     if not config.get("multi", False):
         paths: List[str] = [config["path"]]
         dests: List[str] = [config["dest"]]
-        if "time_suffix" in config.keys() and config["time_suffix"]:
-            if "time_format" in config.keys():
-                time_format = config["time_format"]
-            # Insert time before . in file type suffix
-            paths = [p.split(".")[0] + datetime.now().strftime(time_format) + "." + p.split(".")[1] if "." in p else p for p in paths]
-            dests = [d.split(".")[0] + datetime.now().strftime(time_format) + "." + d.split(".")[1] if "." in d else d for d in dests]
+        if config.get("time_suffix", False):
+            time_format = config.get("time_format", time_format)
+            # Insert time before file extension.
+            dests = [f"{os.path.splitext(d)[0]}_{datetime.now().strftime(time_format)}{os.path.splitext(d)[1]}" for d in dests]
     else:
         paths: List[str] = config["path"]
         dests: List[str] = config["dest"]
diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py
index 19a450d..79cd248 100644
--- a/desktop_env/evaluators/metrics/__init__.py
+++ b/desktop_env/evaluators/metrics/__init__.py
@@ -52,7 +52,8 @@ from .docs import (
     compare_docx_files_and_ignore_new_lines,
     compare_docx_images,
     compare_image_text,
-    compare_references
+    compare_references,
+    compare_unique_train_records
 )
 from .general import (
     check_csv,
diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py
index 81b3dc0..908a387 100644
--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -167,8 +167,12 @@ def compare_docx_files(file1, file2, **options):
             if ignore_case:
                 p1, p2 = p1.lower(), p2.lower()
             if p1 != p2:
-                print(p1)
-                print(p2)
+                # show the difference
+                print("=== First Paragraph ===")
+                print(f"\033[92m{repr(p1)}\033[0m")  # Green color for p1, repr() shows hidden chars
+                print("=== Second Paragraph ===") 
+                print(f"\033[91m{repr(p2)}\033[0m")  # Red color for p2, repr() shows hidden chars
+                print("=" * 50)  # Clear boundary
                 return 0
 
     return 1
@@ -886,3 +890,72 @@ def compare_references(file1, file2, **options):
         return (result - reference_base_result) / (1 - reference_base_result)
     else:
         return 0
+
+
+def compare_unique_train_records(processed_file, expected_files, **kwargs):
+    """
+    Compares the processed file with a list of expected files containing the
+    gold standard and the initial document.
+    expected_files[0] should be the gold standard file.
+    expected_files[1] should be the initial file.
+    """
+    # Debug logging to understand what we're actually receiving
+    logger.info(f"DEBUG: processed_file type: {type(processed_file)}, value: {processed_file}")
+    logger.info(f"DEBUG: expected_files type: {type(expected_files)}, value: {expected_files}")
+    logger.info(f"DEBUG: kwargs: {kwargs}")
+    
+    if not processed_file or not isinstance(expected_files, list) or len(expected_files) < 2:
+        logger.error("Invalid arguments: processed_file and a list of 2 expected_files are required.")
+        return 0
+
+    gold_file = expected_files[0]
+    initial_file = expected_files[1]
+
+    if not gold_file or not initial_file:
+        logger.error("Gold file or initial file path is missing from expected_files list.")
+        return 0
+
+    # Helper function to get lines and IDs from a file
+    def get_lines_and_ids_from_file(file_path):
+        try:
+            doc = Document(file_path)
+            lines = [p.text.strip() for p in doc.paragraphs if p.text.strip()]
+            train_ids = [line.split(',')[1].strip() for line in lines if len(line.split(',')) == 4]
+            return lines, train_ids
+        except Exception as e:
+            logger.error(f"Error opening or parsing file {file_path}: {e}")
+            return None, None
+
+    # Get data from all three files
+    processed_lines, processed_train_ids = get_lines_and_ids_from_file(processed_file)
+    if processed_lines is None: return 0
+
+    gold_lines, gold_train_ids = get_lines_and_ids_from_file(gold_file)
+    if gold_lines is None: return 0
+
+    initial_lines, _ = get_lines_and_ids_from_file(initial_file)
+    if initial_lines is None: return 0
+    initial_lines_set = set(initial_lines)
+
+    # 1. Subset Check: Ensure every processed line was in the initial file
+    if not set(processed_lines).issubset(initial_lines_set):
+        logger.error("Processed file contains lines not present in the initial file.")
+        logger.error(f"Extra lines: {set(processed_lines) - initial_lines_set}")
+        return 0
+
+    # 2. Uniqueness Check: Check for duplicates within the processed file
+    if len(processed_train_ids) != len(set(processed_train_ids)):
+        logger.error("Duplicate train_ids found in the processed file.")
+        return 0
+
+    # 3. Correctness Check: Compare the set of train_ids
+    if set(processed_train_ids) != set(gold_train_ids):
+        logger.error("Set of train_ids does not match between processed file and gold file.")
+        return 0
+
+    # 4. Line count check
+    if len(processed_lines) != len(gold_lines):
+        logger.error("Number of lines does not match between processed file and gold file.")
+        return 0
+
+    return 1
diff --git a/desktop_env/providers/aws/manager.py b/desktop_env/providers/aws/manager.py
index 6e6dafb..1502083 100644
--- a/desktop_env/providers/aws/manager.py
+++ b/desktop_env/providers/aws/manager.py
@@ -5,6 +5,9 @@ import psutil
 import logging
 import dotenv
 import signal
+
+INSTANCE_TYPE = "t3.large"
+
 # Load environment variables from .env file
 dotenv.load_dotenv()
 
@@ -31,37 +34,17 @@ logger.setLevel(logging.INFO)
 DEFAULT_REGION = "us-east-1"
 # todo: Add doc for the configuration of image, security group and network interface
 # todo: public the AMI images
-# ami-05e7d7bd279ea4f14
 IMAGE_ID_MAP = {
-    "us-east-1": "ami-00674d875de9addc1",
+    "us-east-1": "ami-03a22c6e501415fb1",
     "ap-east-1": "ami-0c092a5b8be4116f5",
 }
 
-INSTANCE_TYPE = "t3.medium"
 
 def _allocate_vm(region=DEFAULT_REGION):
     
     if region not in IMAGE_ID_MAP:
         raise ValueError(f"Region {region} is not supported. Supported regions are: {list(IMAGE_ID_MAP.keys())}")
 
-    run_instances_params = {
-        "MaxCount": 1,
-        "MinCount": 1,
-        "ImageId": IMAGE_ID_MAP[region],
-        "InstanceType": INSTANCE_TYPE,
-        "EbsOptimized": True,
-        "NetworkInterfaces": [
-            {
-                "SubnetId": os.getenv('AWS_SUBNET_ID'),
-                "AssociatePublicIpAddress": True,
-                "DeviceIndex": 0,
-                "Groups": [
-                    os.getenv('AWS_SECURITY_GROUP_ID')
-                ]
-            }
-        ]
-    }
-
     ec2_client = boto3.client('ec2', region_name=region)
     instance_id = None
     original_sigint_handler = signal.getsignal(signal.SIGINT)
@@ -94,26 +77,64 @@ def _allocate_vm(region=DEFAULT_REGION):
         signal.signal(signal.SIGINT, signal_handler)
         signal.signal(signal.SIGTERM, signal_handler)
         
+        if not os.getenv('AWS_SECURITY_GROUP_ID'):
+            raise ValueError("AWS_SECURITY_GROUP_ID is not set in the environment variables.")
+        if not os.getenv('AWS_SUBNET_ID'):
+            raise ValueError("AWS_SUBNET_ID is not set in the environment variables.")
+
+        run_instances_params = {
+            "MaxCount": 1,
+            "MinCount": 1,
+            "ImageId": IMAGE_ID_MAP[region],
+            "InstanceType": INSTANCE_TYPE,
+            "EbsOptimized": True,
+            "NetworkInterfaces": [
+                {
+                    "SubnetId": os.getenv('AWS_SUBNET_ID'),
+                    "AssociatePublicIpAddress": True,
+                    "DeviceIndex": 0,
+                    "Groups": [
+                        os.getenv('AWS_SECURITY_GROUP_ID')
+                    ]
+                }
+            ]
+        }
+        
         response = ec2_client.run_instances(**run_instances_params)
         instance_id = response['Instances'][0]['InstanceId']
+        
+        waiter = ec2_client.get_waiter('instance_running')
         logger.info(f"Waiting for instance {instance_id} to be running...")
-        ec2_client.get_waiter('instance_running').wait(InstanceIds=[instance_id])
+        waiter.wait(InstanceIds=[instance_id])
         logger.info(f"Instance {instance_id} is ready.")
+        
+        # 获取并显示VNC访问地址
+        try:
+            instance_details = ec2_client.describe_instances(InstanceIds=[instance_id])
+            instance = instance_details['Reservations'][0]['Instances'][0]
+            public_ip = instance.get('PublicIpAddress', '')
+            if public_ip:
+                vnc_url = f"http://{public_ip}:5910/vnc.html"
+                logger.info("="*80)
+                logger.info(f"🖥️  VNC Web Access URL: {vnc_url}")
+                logger.info(f"📡 Public IP: {public_ip}")
+                logger.info(f"🆔 Instance ID: {instance_id}")
+                logger.info("="*80)
+                print(f"\n🌐 VNC访问地址: {vnc_url}")
+                print(f"📍 请在浏览器中打开上述地址进行远程桌面访问\n")
+        except Exception as e:
+            logger.warning(f"Failed to get VNC address for instance {instance_id}: {e}")
     except KeyboardInterrupt:
         logger.warning("VM allocation interrupted by user (SIGINT).")
-        raise
-    except SystemExit:
-        logger.warning("VM allocation terminated by parent process (SIGTERM).")
+        if instance_id:
+            logger.info(f"Terminating instance {instance_id} due to interruption.")
+            ec2_client.terminate_instances(InstanceIds=[instance_id])
         raise
     except Exception as e:
-        logger.error(f"Failed to allocate VM in region {region}: {str(e)}")
-        # try to clean up any resources that were created
-        try:
-            if instance_id:
-                ec2_client.terminate_instances(InstanceIds=[instance_id])
-                logger.info(f"Terminated instance {instance_id} due to allocation failure.")
-        except Exception as cleanup_error:
-            logger.error(f"May fail to clean up instance {instance_id}: {str(cleanup_error)}")
+        logger.error(f"Failed to allocate VM: {e}", exc_info=True)
+        if instance_id:
+            logger.info(f"Terminating instance {instance_id} due to an error.")
+            ec2_client.terminate_instances(InstanceIds=[instance_id])
         raise
     finally:
         # Restore original signal handlers
@@ -153,6 +174,27 @@ def _allocate_vm_with_proxy(region=DEFAULT_REGION, proxy_config_file=None):
         subnet_id=os.getenv('AWS_SUBNET_ID')
     )
     
+    try:
+        ec2_client = boto3.client('ec2', region_name=region)
+        instance_details = ec2_client.describe_instances(InstanceIds=[instance_id])
+        instance = instance_details['Reservations'][0]['Instances'][0]
+        public_ip = instance.get('PublicIpAddress', '')
+        if public_ip:
+            vnc_url = f"http://{public_ip}:5910/vnc.html"
+            logger.info("="*80)
+            logger.info(f"🖥️  VNC Web Access URL: {vnc_url}")
+            logger.info(f"📡 Public IP: {public_ip}")
+            logger.info(f"🆔 Instance ID: {instance_id}")
+            if current_proxy:
+                logger.info(f"🌐 Proxy: {current_proxy.host}:{current_proxy.port}")
+            logger.info("="*80)
+            print(f"\n🌐 VNC Web Access URL: {vnc_url}")
+            if current_proxy:
+                print(f"🔄 Current Proxy: {current_proxy.host}:{current_proxy.port}")
+            print(f"📍 Please open the above address in the browser for remote desktop access\n")
+    except Exception as e:
+        logger.warning(f"Failed to get VNC address for proxy instance {instance_id}: {e}")
+    
     return instance_id
 
 
@@ -213,4 +255,4 @@ class AWSVMManager(VMManager):
         else:
             logger.info("Allocating a new VM in region: {}".format(region))
             new_vm_path = _allocate_vm(region)
-        return new_vm_path
+        return new_vm_path
\ No newline at end of file
diff --git a/desktop_env/providers/aws/provider.py b/desktop_env/providers/aws/provider.py
index 882710e..d2a87c8 100644
--- a/desktop_env/providers/aws/provider.py
+++ b/desktop_env/providers/aws/provider.py
@@ -63,10 +63,24 @@ class AWSProvider(Provider):
             for reservation in response['Reservations']:
                 for instance in reservation['Instances']:
                     private_ip_address = instance.get('PrivateIpAddress', '')
+                    public_ip_address = instance.get('PublicIpAddress', '')
+                    
+                    if public_ip_address:
+                        vnc_url = f"http://{public_ip_address}:5910/vnc.html"
+                        logger.info("="*80)
+                        logger.info(f"🖥️  VNC Web Access URL: {vnc_url}")
+                        logger.info(f"📡 Public IP: {public_ip_address}")
+                        logger.info(f"🏠 Private IP: {private_ip_address}")
+                        logger.info("="*80)
+                        print(f"\n🌐 VNC Web Access URL: {vnc_url}")
+                        print(f"📍 Please open the above address in the browser for remote desktop access\n")
+                    else:
+                        logger.warning("No public IP address available for VNC access")
+                    
                     return private_ip_address
             return ''  # Return an empty string if no IP address is found
         except ClientError as e:
-            logger.error(f"Failed to retrieve private IP address for the instance {path_to_vm}: {str(e)}")
+            logger.error(f"Failed to retrieve IP address for the instance {path_to_vm}: {str(e)}")
             raise
 
     def save_state(self, path_to_vm: str, snapshot_name: str):
@@ -74,7 +88,7 @@ class AWSProvider(Provider):
         ec2_client = boto3.client('ec2', region_name=self.region)
 
         try:
-            image_response = ec2_client.create_image(InstanceId=path_to_vm, ImageId=snapshot_name)
+            image_response = ec2_client.create_image(InstanceId=path_to_vm, Name=snapshot_name)
             image_id = image_response['ImageId']
             logger.info(f"AMI {image_id} created successfully from instance {path_to_vm}.")
             return image_id
@@ -83,7 +97,7 @@ class AWSProvider(Provider):
             raise
 
     def revert_to_snapshot(self, path_to_vm: str, snapshot_name: str):
-        logger.info(f"Reverting AWS VM to snapshot: {snapshot_name}...")
+        logger.info(f"Reverting AWS VM to snapshot AMI: {snapshot_name}...")
         ec2_client = boto3.client('ec2', region_name=self.region)
 
         try:
@@ -93,23 +107,21 @@ class AWSProvider(Provider):
             security_groups = [sg['GroupId'] for sg in instance['SecurityGroups']]
             subnet_id = instance['SubnetId']
             instance_type = instance['InstanceType']
-            instance_snapshot = instance_details['Reservations'][0]['Instances'][0]['ImageId']
-
+            
             # Step 2: Terminate the old instance
             ec2_client.terminate_instances(InstanceIds=[path_to_vm])
             logger.info(f"Old instance {path_to_vm} has been terminated.")
 
-            # Step 3: Launch a new instance from the snapshot
-            logger.info(f"Launching a new instance from snapshot {instance_snapshot}...")
-
-
-            new_instance = ec2_client.run_instances(
-                MaxCount = 1,
-                MinCount = 1,
-                ImageId = instance_snapshot, 
-                InstanceType = instance_type,
-                EbsOptimized = True,
-                NetworkInterfaces = [
+            # Step 3: Launch a new instance from the snapshot(AMI) with performance optimization
+            logger.info(f"Launching a new instance from AMI {snapshot_name}...")
+            
+            run_instances_params = {
+                "MaxCount": 1,
+                "MinCount": 1,
+                "ImageId": snapshot_name,
+                "InstanceType": instance_type,
+                "EbsOptimized": True,
+                "NetworkInterfaces": [
                     {
                         "SubnetId": subnet_id,
                         "AssociatePublicIpAddress": True,
@@ -117,13 +129,31 @@ class AWSProvider(Provider):
                         "Groups": security_groups
                     }
                 ]
-            )
+            }
+            
+            new_instance = ec2_client.run_instances(**run_instances_params)
             new_instance_id = new_instance['Instances'][0]['InstanceId']
-            logger.info(f"New instance {new_instance_id} launched from snapshot {snapshot_name}.")
+            logger.info(f"New instance {new_instance_id} launched from AMI {snapshot_name}.")
             logger.info(f"Waiting for instance {new_instance_id} to be running...")
             ec2_client.get_waiter('instance_running').wait(InstanceIds=[new_instance_id])
 
             logger.info(f"Instance {new_instance_id} is ready.")
+            
+            try:
+                instance_details = ec2_client.describe_instances(InstanceIds=[new_instance_id])
+                instance = instance_details['Reservations'][0]['Instances'][0]
+                public_ip = instance.get('PublicIpAddress', '')
+                if public_ip:
+                    vnc_url = f"http://{public_ip}:5910/vnc.html"
+                    logger.info("="*80)
+                    logger.info(f"🖥️  New Instance VNC Web Access URL: {vnc_url}")
+                    logger.info(f"📡 Public IP: {public_ip}")
+                    logger.info(f"🆔 New Instance ID: {new_instance_id}")
+                    logger.info("="*80)
+                    print(f"\n🌐 New Instance VNC Web Access URL: {vnc_url}")
+                    print(f"📍 Please open the above address in the browser for remote desktop access\n")
+            except Exception as e:
+                logger.warning(f"Failed to get VNC address for new instance {new_instance_id}: {e}")
 
             return new_instance_id
 
diff --git a/desktop_env/providers/aws/provider_with_proxy.py b/desktop_env/providers/aws/provider_with_proxy.py
index d7cfa0e..2ffb7c0 100644
--- a/desktop_env/providers/aws/provider_with_proxy.py
+++ b/desktop_env/providers/aws/provider_with_proxy.py
@@ -163,16 +163,34 @@ echo "$(date): Configured proxy {self.current_proxy.host}:{self.current_proxy.po
             
             logger.info(f"Created new instance {instance_id} with proxy configuration")
             
-            # 等待实例运行
             logger.info(f"Waiting for instance {instance_id} to be running...")
             ec2_client.get_waiter('instance_running').wait(InstanceIds=[instance_id])
             logger.info(f"Instance {instance_id} is ready.")
+
+            try:
+                instance_details = ec2_client.describe_instances(InstanceIds=[instance_id])
+                instance = instance_details['Reservations'][0]['Instances'][0]
+                public_ip = instance.get('PublicIpAddress', '')
+                if public_ip:
+                    vnc_url = f"http://{public_ip}:5910/vnc.html"
+                    logger.info("="*80)
+                    logger.info(f"🖥️  VNC Web Access URL: {vnc_url}")
+                    logger.info(f"📡 Public IP: {public_ip}")
+                    logger.info(f"🆔 Instance ID: {instance_id}")
+                    if self.current_proxy:
+                        logger.info(f"🌐 Proxy: {self.current_proxy.host}:{self.current_proxy.port}")
+                    logger.info("="*80)
+                    print(f"\n🌐 VNC Web Access URL: {vnc_url}")
+                    if self.current_proxy:
+                        print(f"🔄 Current Proxy: {self.current_proxy.host}:{self.current_proxy.port}")
+                    print(f"📍 Please open the above address in the browser for remote desktop access\n")
+            except Exception as e:
+                logger.warning(f"Failed to get VNC address for instance {instance_id}: {e}")
             
             return instance_id
             
         except ClientError as e:
             logger.error(f"Failed to create instance with proxy: {str(e)}")
-            # 如果当前代理失败，尝试轮换代理
             if self.current_proxy:
                 proxy_pool = get_global_proxy_pool()
                 proxy_pool.mark_proxy_failed(self.current_proxy)
@@ -188,10 +206,28 @@ echo "$(date): Configured proxy {self.current_proxy.host}:{self.current_proxy.po
             for reservation in response['Reservations']:
                 for instance in reservation['Instances']:
                     private_ip_address = instance.get('PrivateIpAddress', '')
+                    public_ip_address = instance.get('PublicIpAddress', '')
+
+                    if public_ip_address:
+                        vnc_url = f"http://{public_ip_address}:5910/vnc.html"
+                        logger.info("="*80)
+                        logger.info(f"🖥️  VNC Web Access URL: {vnc_url}")
+                        logger.info(f"📡 Public IP: {public_ip_address}")
+                        logger.info(f"🏠 Private IP: {private_ip_address}")
+                        if self.current_proxy:
+                            logger.info(f"🌐 Proxy: {self.current_proxy.host}:{self.current_proxy.port}")
+                        logger.info("="*80)
+                        print(f"\n🌐 VNC Web Access URL: {vnc_url}")
+                        if self.current_proxy:
+                            print(f"🔄 Current Proxy: {self.current_proxy.host}:{self.current_proxy.port}")
+                        print(f"📍 Please open the above address in the browser for remote desktop access\n")
+                    else:
+                        logger.warning("No public IP address available for VNC access")
+                    
                     return private_ip_address
             return ''
         except ClientError as e:
-            logger.error(f"Failed to retrieve private IP address for the instance {path_to_vm}: {str(e)}")
+            logger.error(f"Failed to retrieve IP address for the instance {path_to_vm}: {str(e)}")
             raise
 
     def save_state(self, path_to_vm: str, snapshot_name: str):
@@ -212,24 +248,28 @@ echo "$(date): Configured proxy {self.current_proxy.host}:{self.current_proxy.po
         ec2_client = boto3.client('ec2', region_name=self.region)
 
         try:
-            # 获取原实例详情
+            # Get original instance details for config.
             instance_details = ec2_client.describe_instances(InstanceIds=[path_to_vm])
             instance = instance_details['Reservations'][0]['Instances'][0]
             security_groups = [sg['GroupId'] for sg in instance['SecurityGroups']]
             subnet_id = instance['SubnetId']
             instance_type = instance['InstanceType']
 
-            # 终止旧实例
+            # Terminate the old instance. This is a non-blocking call.
+            logger.info(f"Initiating termination for old instance {path_to_vm}...")
             ec2_client.terminate_instances(InstanceIds=[path_to_vm])
-            logger.info(f"Old instance {path_to_vm} has been terminated.")
+            logger.info(f"Old instance {path_to_vm} termination initiated.")
 
-            # 轮换到新的代理
+            # Rotate to a new proxy
             self._rotate_proxy()
             
-            # 创建新实例
+            # Create a new instance
             new_instance_id = self.create_instance_with_proxy(
                 snapshot_name, instance_type, security_groups, subnet_id
             )
+            
+            # Note: VNC address is displayed within create_instance_with_proxy
+            logger.info(f"Successfully launched new instance {new_instance_id} for revert.")
 
             return new_instance_id
 
diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py
index 77ed4d4..de8009d 100644
--- a/desktop_env/server/main.py
+++ b/desktop_env/server/main.py
@@ -4,6 +4,7 @@ import platform
 import shlex
 import json
 import subprocess, signal
+import time
 from pathlib import Path
 from typing import Any, Optional, Sequence
 from typing import List, Dict, Tuple, Literal
@@ -65,6 +66,8 @@ app = Flask(__name__)
 pyautogui.PAUSE = 0
 pyautogui.DARWIN_CATCH_UP_TIME = 0
 
+TIMEOUT = 1800  # seconds
+
 logger = app.logger
 recording_process = None  # fixme: this is a temporary solution for recording, need to be changed to support multiple-process
 recording_path = "/tmp/recording.mp4"
@@ -202,8 +205,8 @@ def capture_screen_with_cursor():
             pos = (round(pos_win[0]*ratio - hotspotx), round(pos_win[1]*ratio - hotspoty))
 
             img.paste(cursor, pos, cursor)
-        except:
-            pass
+        except Exception as e:
+            logger.warning(f"Failed to capture cursor on Windows, screenshot will not have a cursor. Error: {e}")
 
         img.save(file_path)
     elif user_platform == "Linux":
@@ -1124,18 +1127,72 @@ def open_file():
     if not path:
         return "Path not supplied!", 400
 
-    path = Path(os.path.expandvars(os.path.expanduser(path)))
+    path_obj = Path(os.path.expandvars(os.path.expanduser(path)))
 
-    if not path.exists():
-        return f"File not found: {path}", 404
+    if not path_obj.exists():
+        return f"File not found: {path_obj}", 404
 
     try:
         if platform.system() == "Windows":
-            os.startfile(path)
+            os.startfile(path_obj)
         else:
             open_cmd: str = "open" if platform.system() == "Darwin" else "xdg-open"
-            subprocess.Popen([open_cmd, str(path)])
-        return "File opened successfully"
+            subprocess.Popen([open_cmd, str(path_obj)])
+
+        # Wait for the file to open
+        file_name = path_obj.name
+        # Some apps don't include the extension in the title
+        file_name_without_ext, _ = os.path.splitext(file_name)
+
+        start_time = time.time()
+        window_found = False
+
+        while time.time() - start_time < TIMEOUT:
+            os_name = platform.system()
+            if os_name in ['Windows', 'Darwin']:
+                import pygetwindow as gw
+                # Check for window title containing file name or file name without extension
+                windows = gw.getWindowsWithTitle(file_name)
+                if not windows:
+                    windows = gw.getWindowsWithTitle(file_name_without_ext)
+
+                if windows:
+                    # To be more specific, we can try to activate it
+                    windows[0].activate()
+                    window_found = True
+                    break
+            elif os_name == 'Linux':
+                try:
+                    # Using wmctrl to list windows and check if any window title contains the filename
+                    result = subprocess.run(['wmctrl', '-l'], capture_output=True, text=True, check=True)
+                    window_list = result.stdout.strip().split('\n')
+                    if not result.stdout.strip():
+                        pass  # No windows, just continue waiting
+                    else:
+                        for window in window_list:
+                            if file_name in window or file_name_without_ext in window:
+                                # a window is found, now activate it
+                                window_id = window.split()[0]
+                                subprocess.run(['wmctrl', '-i', '-a', window_id], check=True)
+                                window_found = True
+                                break
+                        if window_found:
+                            break
+                except (subprocess.CalledProcessError, FileNotFoundError):
+                    # wmctrl might not be installed or the window manager isn't ready.
+                    # We just log it once and let the main loop retry.
+                    if 'wmctrl_failed_once' not in locals():
+                        logger.warning("wmctrl command is not ready, will keep retrying...")
+                        wmctrl_failed_once = True
+                    pass  # Let the outer loop retry
+
+            time.sleep(1)
+
+        if window_found:
+            return "File opened and window activated successfully"
+        else:
+            return f"Failed to find window for {file_name} within {timeout} seconds.", 500
+
     except Exception as e:
         return f"Failed to open {path}. Error: {e}", 500
 
@@ -1258,37 +1315,78 @@ def close_window():
 @app.route('/start_recording', methods=['POST'])
 def start_recording():
     global recording_process
-    if recording_process:
+    if recording_process and recording_process.poll() is None:
         return jsonify({'status': 'error', 'message': 'Recording is already in progress.'}), 400
 
+    # Clean up previous recording if it exists
+    if os.path.exists(recording_path):
+        try:
+            os.remove(recording_path)
+        except OSError as e:
+            logger.error(f"Error removing old recording file: {e}")
+            return jsonify({'status': 'error', 'message': f'Failed to remove old recording file: {e}'}), 500
+
     d = display.Display()
     screen_width = d.screen().width_in_pixels
     screen_height = d.screen().height_in_pixels
 
     start_command = f"ffmpeg -y -f x11grab -draw_mouse 1 -s {screen_width}x{screen_height} -i :0.0 -c:v libx264 -r 30 {recording_path}"
 
-    recording_process = subprocess.Popen(shlex.split(start_command), stdout=subprocess.DEVNULL,
-                                         stderr=subprocess.DEVNULL)
+    # Use stderr=PIPE to capture potential errors from ffmpeg
+    recording_process = subprocess.Popen(shlex.split(start_command),
+                                         stdout=subprocess.DEVNULL,
+                                         stderr=subprocess.PIPE,
+                                         text=True  # To get stderr as string
+                                         )
 
-    return jsonify({'status': 'success', 'message': 'Started recording.'})
+    # Wait a couple of seconds to see if ffmpeg starts successfully
+    try:
+        # Wait for 2 seconds. If ffmpeg exits within this time, it's an error.
+        recording_process.wait(timeout=2)
+        # If wait() returns, it means the process has terminated.
+        error_output = recording_process.stderr.read()
+        return jsonify({
+            'status': 'error',
+            'message': f'Failed to start recording. ffmpeg terminated unexpectedly. Error: {error_output}'
+        }), 500
+    except subprocess.TimeoutExpired:
+        # This is the expected outcome: the process is still running after 2 seconds.
+        return jsonify({'status': 'success', 'message': 'Started recording successfully.'})
 
 
 @app.route('/end_recording', methods=['POST'])
 def end_recording():
     global recording_process
 
-    if not recording_process:
+    if not recording_process or recording_process.poll() is not None:
+        recording_process = None  # Clean up stale process object
         return jsonify({'status': 'error', 'message': 'No recording in progress to stop.'}), 400
 
+    error_output = ""
+    try:
+        # Send SIGINT for a graceful shutdown, allowing ffmpeg to finalize the file.
     recording_process.send_signal(signal.SIGINT)
-    recording_process.wait()
+        # Wait for ffmpeg to terminate. communicate() gets output and waits.
+        _, error_output = recording_process.communicate(timeout=15)
+    except subprocess.TimeoutExpired:
+        logger.error("ffmpeg did not respond to SIGINT, killing the process.")
+        recording_process.kill()
+        # After killing, communicate to get any remaining output.
+        _, error_output = recording_process.communicate()
     recording_process = None
+        return jsonify({
+            'status': 'error',
+            'message': f'Recording process was unresponsive and had to be killed. Stderr: {error_output}'
+        }), 500
 
-    # return recording video file
-    if os.path.exists(recording_path):
+    recording_process = None  # Clear the process from global state
+
+    # Check if the recording file was created and is not empty.
+    if os.path.exists(recording_path) and os.path.getsize(recording_path) > 0:
         return send_file(recording_path, as_attachment=True)
     else:
-        return abort(404, description="Recording failed")
+        logger.error(f"Recording failed. The output file is missing or empty. ffmpeg stderr: {error_output}")
+        return abort(500, description=f"Recording failed. The output file is missing or empty. ffmpeg stderr: {error_output}")
 
 
 if __name__ == '__main__':
diff --git a/evaluation_examples/examples/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31.json b/evaluation_examples/examples/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31.json
index d192413..dd20e95 100644
--- a/evaluation_examples/examples/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31.json
+++ b/evaluation_examples/examples/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31.json
@@ -27,17 +27,57 @@
     "libreoffice_writer"
   ],
   "evaluator": {
-    "func": "compare_pdfs",
-    "expected": {
+    "func": [
+      "compare_pdfs",
+      "compare_pdfs", 
+      "compare_pdfs",
+      "compare_pdfs"
+    ],
+    "conj": "or",
+    "expected": [
+      {
+        "type": "cloud_file",
+        "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf",
+        "dest": "Constitution_Template_With_Guidelines_Gold_1.pdf"
+      },
+      {
+        "type": "cloud_file", 
+        "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf",
+        "dest": "Constitution_Template_With_Guidelines_Gold_2.pdf"
+      },
+      {
       "type": "cloud_file",
       "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf",
-      "dest": "Constitution_Template_With_Guidelines_Gold.pdf"
+        "dest": "Constitution_Template_With_Guidelines_Gold_3.pdf"
     },
-    "result": {
+      {
+        "type": "cloud_file",
+        "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf",
+        "dest": "Constitution_Template_With_Guidelines_Gold_4.pdf" 
+      }
+    ],
+    "result": [
+      {
       "type": "vm_file",
       "path": "/home/user/Desktop/View_Person_Organizational_Summary.pdf",
-      "dest": "Constitution_Template_With_Guidelines.pdf"
-    }
+        "dest": "Constitution_Template_With_Guidelines_1.pdf"
+      },
+      {
+        "type": "vm_file",
+        "path": "/home/user/Documents/View_Person_Organizational_Summary.pdf", 
+        "dest": "Constitution_Template_With_Guidelines_2.pdf"
+      },
+      {
+        "type": "vm_file",
+        "path": "/home/user/Downloads/View_Person_Organizational_Summary.pdf",
+        "dest": "Constitution_Template_With_Guidelines_3.pdf"
+      },
+      {
+        "type": "vm_file", 
+        "path": "/home/user/View_Person_Organizational_Summary.pdf",
+        "dest": "Constitution_Template_With_Guidelines_4.pdf"
+      }
+    ]
   },
   "proxy": false
 }
\ No newline at end of file
diff --git a/evaluation_examples/examples/libreoffice_writer/6ada715d-3aae-4a32-a6a7-429b2e43fb93.json b/evaluation_examples/examples/libreoffice_writer/6ada715d-3aae-4a32-a6a7-429b2e43fb93.json
index eac54d7..49a817a 100644
--- a/evaluation_examples/examples/libreoffice_writer/6ada715d-3aae-4a32-a6a7-429b2e43fb93.json
+++ b/evaluation_examples/examples/libreoffice_writer/6ada715d-3aae-4a32-a6a7-429b2e43fb93.json
@@ -38,7 +38,7 @@
         "command": [
           "python",
           "-c",
-          "import pyautogui; import time; time.sleep(5); pyautogui.press(\"down\", presses=8, interval=0.01); time.sleep(1); pyautogui.scroll(-2)"
+          "import pyautogui; import time; time.sleep(15); pyautogui.press(\"down\", presses=8, interval=0.01); time.sleep(1); pyautogui.scroll(-2)"
         ]
       }
     }
@@ -68,12 +68,12 @@
           "command": [
             "python",
             "-c",
-            "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); pyautogui.press('down'); time.sleep(0.5); pyautogui.press('enter');"
+            "import pyautogui; import time; time.sleep(1); pyautogui.hotkey('ctrl', 's'); time.sleep(3);"
           ]
         }
       }
     ],
-    "func": "compare_contains_image",
+    "func": "compare_docx_images",
     "result": {
       "type": "vm_file",
       "path": "/home/user/Desktop/Viewing_Your_Class_Schedule_and_Textbooks.docx",
diff --git a/evaluation_examples/examples/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108.json b/evaluation_examples/examples/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108.json
index bb6bbb0..cb632a4 100644
--- a/evaluation_examples/examples/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108.json
+++ b/evaluation_examples/examples/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108.json
@@ -52,7 +52,7 @@
         }
       }
     ],
-    "func": "compare_docx_lines",
+    "func": "compare_unique_train_records",
     "result": {
       "type": "vm_file",
       "path": "/home/user/Desktop/HK_train_record.docx",
@@ -60,8 +60,16 @@
     },
     "expected": {
       "type": "cloud_file",
-      "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108/HK_train_record_Gold.docx",
-      "dest": "HK_train_record_Gold.docx"
+      "path": [
+        "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108/HK_train_record_Gold.docx",
+        "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108/HK_train_record.docx"
+      ],
+      "dest": [
+        "HK_train_record_Gold.docx",
+        "HK_train_record_Original.docx"
+      ],
+      "multi": true,
+      "gives": [0, 1]
     }
   },
   "proxy": false
diff --git a/evaluation_examples/examples/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48.json b/evaluation_examples/examples/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48.json
index 546d957..2bcf614 100644
--- a/evaluation_examples/examples/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48.json
+++ b/evaluation_examples/examples/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48.json
@@ -52,20 +52,57 @@
         }
       }
     ],
-    "func": "compare_docx_files",
-    "expected": {
-      "type": "cloud_file",
-      "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48/CCCH9003_Tutorial_guidelines_Gold.docx",
-      "dest": "CCCH9003_Tutorial_guidelines_Gold.docx"
-    },
-    "result": {
-      "type": "vm_file",
-      "path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx",
-      "dest": "CCCH9003_Tutorial_guidelines.docx"
-    },
-    "options": {
-      "ignore_blanks": false
-    }
+    "func": [
+      "compare_docx_files",
+      "compare_docx_files",
+      "compare_docx_files"
+    ],
+    "conj": "or",
+    "expected": [
+      {
+        "type": "cloud_file",
+        "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48/CCCH9003_Tutorial_guidelines_Gold_1.docx",
+        "dest": "CCCH9003_Tutorial_guidelines_Gold_1.docx"
+      },
+      {
+        "type": "cloud_file",
+        "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48/CCCH9003_Tutorial_guidelines_Gold_2.docx",
+        "dest": "CCCH9003_Tutorial_guidelines_Gold_2.docx"
+      },
+      {
+        "type": "cloud_file",
+        "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48/CCCH9003_Tutorial_guidelines_Gold_3.docx",
+        "dest": "CCCH9003_Tutorial_guidelines_Gold_3.docx"
+      }
+    ],
+    "result": [
+      {
+        "type": "vm_file",
+        "path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx",
+        "dest": "CCCH9003_Tutorial_guidelines.docx"
+      },
+      {
+        "type": "vm_file",
+        "path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx",
+        "dest": "CCCH9003_Tutorial_guidelines.docx"
+      },
+      {
+        "type": "vm_file",
+        "path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx",
+        "dest": "CCCH9003_Tutorial_guidelines.docx"
+      }
+    ],
+    "options": [
+      {
+        "ignore_blanks": false
+      },
+      {
+        "ignore_blanks": false
+      },
+      {
+        "ignore_blanks": false
+      }
+    ]
   },
   "proxy": false
 }
\ No newline at end of file
diff --git a/evaluation_examples/examples/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f.json b/evaluation_examples/examples/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f.json
index 9abfd48..e7143c5 100644
--- a/evaluation_examples/examples/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f.json
+++ b/evaluation_examples/examples/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f.json
@@ -47,22 +47,40 @@
           "command": [
             "python",
             "-c",
-            "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); "
+            "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(2); "
           ]
         }
       }
     ],
-    "func": "compare_docx_tables",
-    "expected": {
-      "type": "cloud_file",
-      "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f/Graphemes_Sound_Letter_Patterns_Gold.docx",
-      "dest": "Graphemes_Sound_Letter_Patterns_Gold.docx"
-    },
-    "result": {
-      "type": "vm_file",
-      "path": "/home/user/Desktop/Graphemes_Sound_Letter_Patterns.docx",
-      "dest": "Graphemes_Sound_Letter_Patterns.docx"
-    }
+    "func": [
+      "compare_docx_tables",
+      "compare_docx_tables"
+    ],
+    "conj": "or",
+    "expected": [
+      {
+        "type": "cloud_file",
+        "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f/Graphemes_Sound_Letter_Patterns_Gold.docx",
+        "dest": "Graphemes_Sound_Letter_Patterns_Gold.docx"
+      },
+      {
+        "type": "cloud_file",
+        "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f/Graphemes_Sound_Letter_Patterns_Gold_2.docx",
+        "dest": "Graphemes_Sound_Letter_Patterns_Gold_2.docx"
+      }
+    ],
+    "result": [
+      {
+        "type": "vm_file",
+        "path": "/home/user/Desktop/Graphemes_Sound_Letter_Patterns.docx",
+        "dest": "Graphemes_Sound_Letter_Patterns.docx"
+      },
+      {
+        "type": "vm_file",
+        "path": "/home/user/Desktop/Graphemes_Sound_Letter_Patterns.docx",
+        "dest": "Graphemes_Sound_Letter_Patterns.docx"
+      }
+    ]
   },
   "proxy": false
 }
\ No newline at end of file