Robust Evaluation, Blocking File Open, Grader Sensitivity, and LibreOffice Writer Fixes (#217)

* Refactor evaluator structure in LibreOffice Writer example JSON to support multiple expected and result files, enhancing evaluation flexibility.

* Update instance type to t3.large and add VNC access URL logging for allocated VMs, enhancing remote access capabilities.

* Update instance type to t3.large and add VNC access URL logging for allocated VMs, enhancing remote access capabilities.

* Update time format in get_vm_file function to include hours, minutes, and seconds for more precise file naming with time suffix.

* More delay for 936321ce-5236-426a-9a20-e0e3c5dc536f; support one more potential solutions.

* Enhance SetupController with configurable retry limit and improved error handling for file opening requests. Introduce new function to compare unique training records, and update logging for better debugging. Adjust JSON examples for evaluation to support multiple expected and result files.

* Clean debug code

---------

Co-authored-by: yuanmengqi <yuanmengqi@mail.ustc.edu.cn>
This commit is contained in:
Tianbao Xie
2025-06-16 21:37:19 +08:00
committed by GitHub
parent 347238e17e
commit 4e11eafd1d
13 changed files with 523 additions and 135 deletions

View File

@@ -36,6 +36,8 @@ FILE_PATH = os.path.dirname(os.path.abspath(__file__))
init_proxy_pool(PROXY_CONFIG_FILE) # initialize the global proxy pool
MAX_RETRIES = 20
class SetupController:
def __init__(self, vm_ip: str, server_port: int = 5000, chromium_port: int = 9222, vlc_port: int = 8080, cache_dir: str = "cache"):
self.vm_ip: str = vm_ip
@@ -64,16 +66,16 @@ class SetupController:
# make sure connection can be established
logger.info(f"try to connect {self.http_server}")
retry = 0
while retry < 50:
while retry < MAX_RETRIES:
try:
_ = requests.get(self.http_server + "/terminal")
break
except:
time.sleep(5)
retry += 1
logger.info(f"retry: {retry}/50")
logger.info(f"retry: {retry}/{MAX_RETRIES}")
if retry == 50:
if retry == MAX_RETRIES:
return False
@@ -219,13 +221,14 @@ class SetupController:
# send request to server to open file
try:
response = requests.post(self.http_server + "/setup" + "/open_file", headers=headers, data=payload)
if response.status_code == 200:
logger.info("Command executed successfully: %s", response.text)
else:
logger.error("Failed to open file. Status code: %s", response.text)
# The server-side call is now blocking and can take time.
# We set a timeout that is slightly longer than the server's timeout (1800s).
response = requests.post(self.http_server + "/setup" + "/open_file", headers=headers, data=payload, timeout=1810)
response.raise_for_status() # This will raise an exception for 4xx and 5xx status codes
logger.info("Command executed successfully: %s", response.text)
except requests.exceptions.RequestException as e:
logger.error("An error occurred while trying to send the request: %s", e)
logger.error(f"Failed to open file '{path}'. An error occurred while trying to send the request or the server responded with an error: {e}")
raise Exception(f"Failed to open file '{path}'. An error occurred while trying to send the request or the server responded with an error: {e}") from e
def _launch_setup(self, command: Union[str, List[str]], shell: bool = False):
if not command:

View File

@@ -80,18 +80,16 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option
returned.
only support for single file now:
time_suffix(bool): optional. defaults to False. if True, append the current time in required format.
time_format(str): optional. defaults to "%Y_%m_%d". format of the time suffix.
time_format(str): optional. defaults to "%Y%m%d_%H%M%S". format of the time suffix.
"""
time_format = "%Y_%m_%d"
time_format = "%Y%m%d_%H%M%S"
if not config.get("multi", False):
paths: List[str] = [config["path"]]
dests: List[str] = [config["dest"]]
if "time_suffix" in config.keys() and config["time_suffix"]:
if "time_format" in config.keys():
time_format = config["time_format"]
# Insert time before . in file type suffix
paths = [p.split(".")[0] + datetime.now().strftime(time_format) + "." + p.split(".")[1] if "." in p else p for p in paths]
dests = [d.split(".")[0] + datetime.now().strftime(time_format) + "." + d.split(".")[1] if "." in d else d for d in dests]
if config.get("time_suffix", False):
time_format = config.get("time_format", time_format)
# Insert time before file extension.
dests = [f"{os.path.splitext(d)[0]}_{datetime.now().strftime(time_format)}{os.path.splitext(d)[1]}" for d in dests]
else:
paths: List[str] = config["path"]
dests: List[str] = config["dest"]

View File

@@ -52,7 +52,8 @@ from .docs import (
compare_docx_files_and_ignore_new_lines,
compare_docx_images,
compare_image_text,
compare_references
compare_references,
compare_unique_train_records
)
from .general import (
check_csv,

View File

@@ -167,8 +167,12 @@ def compare_docx_files(file1, file2, **options):
if ignore_case:
p1, p2 = p1.lower(), p2.lower()
if p1 != p2:
print(p1)
print(p2)
# show the difference
print("=== First Paragraph ===")
print(f"\033[92m{repr(p1)}\033[0m") # Green color for p1, repr() shows hidden chars
print("=== Second Paragraph ===")
print(f"\033[91m{repr(p2)}\033[0m") # Red color for p2, repr() shows hidden chars
print("=" * 50) # Clear boundary
return 0
return 1
@@ -886,3 +890,72 @@ def compare_references(file1, file2, **options):
return (result - reference_base_result) / (1 - reference_base_result)
else:
return 0
def compare_unique_train_records(processed_file, expected_files, **kwargs):
"""
Compares the processed file with a list of expected files containing the
gold standard and the initial document.
expected_files[0] should be the gold standard file.
expected_files[1] should be the initial file.
"""
# Debug logging to understand what we're actually receiving
logger.info(f"DEBUG: processed_file type: {type(processed_file)}, value: {processed_file}")
logger.info(f"DEBUG: expected_files type: {type(expected_files)}, value: {expected_files}")
logger.info(f"DEBUG: kwargs: {kwargs}")
if not processed_file or not isinstance(expected_files, list) or len(expected_files) < 2:
logger.error("Invalid arguments: processed_file and a list of 2 expected_files are required.")
return 0
gold_file = expected_files[0]
initial_file = expected_files[1]
if not gold_file or not initial_file:
logger.error("Gold file or initial file path is missing from expected_files list.")
return 0
# Helper function to get lines and IDs from a file
def get_lines_and_ids_from_file(file_path):
try:
doc = Document(file_path)
lines = [p.text.strip() for p in doc.paragraphs if p.text.strip()]
train_ids = [line.split(',')[1].strip() for line in lines if len(line.split(',')) == 4]
return lines, train_ids
except Exception as e:
logger.error(f"Error opening or parsing file {file_path}: {e}")
return None, None
# Get data from all three files
processed_lines, processed_train_ids = get_lines_and_ids_from_file(processed_file)
if processed_lines is None: return 0
gold_lines, gold_train_ids = get_lines_and_ids_from_file(gold_file)
if gold_lines is None: return 0
initial_lines, _ = get_lines_and_ids_from_file(initial_file)
if initial_lines is None: return 0
initial_lines_set = set(initial_lines)
# 1. Subset Check: Ensure every processed line was in the initial file
if not set(processed_lines).issubset(initial_lines_set):
logger.error("Processed file contains lines not present in the initial file.")
logger.error(f"Extra lines: {set(processed_lines) - initial_lines_set}")
return 0
# 2. Uniqueness Check: Check for duplicates within the processed file
if len(processed_train_ids) != len(set(processed_train_ids)):
logger.error("Duplicate train_ids found in the processed file.")
return 0
# 3. Correctness Check: Compare the set of train_ids
if set(processed_train_ids) != set(gold_train_ids):
logger.error("Set of train_ids does not match between processed file and gold file.")
return 0
# 4. Line count check
if len(processed_lines) != len(gold_lines):
logger.error("Number of lines does not match between processed file and gold file.")
return 0
return 1

View File

@@ -5,6 +5,9 @@ import psutil
import logging
import dotenv
import signal
INSTANCE_TYPE = "t3.large"
# Load environment variables from .env file
dotenv.load_dotenv()
@@ -31,37 +34,17 @@ logger.setLevel(logging.INFO)
DEFAULT_REGION = "us-east-1"
# todo: Add doc for the configuration of image, security group and network interface
# todo: public the AMI images
# ami-05e7d7bd279ea4f14
IMAGE_ID_MAP = {
"us-east-1": "ami-00674d875de9addc1",
"us-east-1": "ami-03a22c6e501415fb1",
"ap-east-1": "ami-0c092a5b8be4116f5",
}
INSTANCE_TYPE = "t3.medium"
def _allocate_vm(region=DEFAULT_REGION):
if region not in IMAGE_ID_MAP:
raise ValueError(f"Region {region} is not supported. Supported regions are: {list(IMAGE_ID_MAP.keys())}")
run_instances_params = {
"MaxCount": 1,
"MinCount": 1,
"ImageId": IMAGE_ID_MAP[region],
"InstanceType": INSTANCE_TYPE,
"EbsOptimized": True,
"NetworkInterfaces": [
{
"SubnetId": os.getenv('AWS_SUBNET_ID'),
"AssociatePublicIpAddress": True,
"DeviceIndex": 0,
"Groups": [
os.getenv('AWS_SECURITY_GROUP_ID')
]
}
]
}
ec2_client = boto3.client('ec2', region_name=region)
instance_id = None
original_sigint_handler = signal.getsignal(signal.SIGINT)
@@ -94,26 +77,64 @@ def _allocate_vm(region=DEFAULT_REGION):
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
if not os.getenv('AWS_SECURITY_GROUP_ID'):
raise ValueError("AWS_SECURITY_GROUP_ID is not set in the environment variables.")
if not os.getenv('AWS_SUBNET_ID'):
raise ValueError("AWS_SUBNET_ID is not set in the environment variables.")
run_instances_params = {
"MaxCount": 1,
"MinCount": 1,
"ImageId": IMAGE_ID_MAP[region],
"InstanceType": INSTANCE_TYPE,
"EbsOptimized": True,
"NetworkInterfaces": [
{
"SubnetId": os.getenv('AWS_SUBNET_ID'),
"AssociatePublicIpAddress": True,
"DeviceIndex": 0,
"Groups": [
os.getenv('AWS_SECURITY_GROUP_ID')
]
}
]
}
response = ec2_client.run_instances(**run_instances_params)
instance_id = response['Instances'][0]['InstanceId']
waiter = ec2_client.get_waiter('instance_running')
logger.info(f"Waiting for instance {instance_id} to be running...")
ec2_client.get_waiter('instance_running').wait(InstanceIds=[instance_id])
waiter.wait(InstanceIds=[instance_id])
logger.info(f"Instance {instance_id} is ready.")
# 获取并显示VNC访问地址
try:
instance_details = ec2_client.describe_instances(InstanceIds=[instance_id])
instance = instance_details['Reservations'][0]['Instances'][0]
public_ip = instance.get('PublicIpAddress', '')
if public_ip:
vnc_url = f"http://{public_ip}:5910/vnc.html"
logger.info("="*80)
logger.info(f"🖥️ VNC Web Access URL: {vnc_url}")
logger.info(f"📡 Public IP: {public_ip}")
logger.info(f"🆔 Instance ID: {instance_id}")
logger.info("="*80)
print(f"\n🌐 VNC访问地址: {vnc_url}")
print(f"📍 请在浏览器中打开上述地址进行远程桌面访问\n")
except Exception as e:
logger.warning(f"Failed to get VNC address for instance {instance_id}: {e}")
except KeyboardInterrupt:
logger.warning("VM allocation interrupted by user (SIGINT).")
raise
except SystemExit:
logger.warning("VM allocation terminated by parent process (SIGTERM).")
if instance_id:
logger.info(f"Terminating instance {instance_id} due to interruption.")
ec2_client.terminate_instances(InstanceIds=[instance_id])
raise
except Exception as e:
logger.error(f"Failed to allocate VM in region {region}: {str(e)}")
# try to clean up any resources that were created
try:
if instance_id:
ec2_client.terminate_instances(InstanceIds=[instance_id])
logger.info(f"Terminated instance {instance_id} due to allocation failure.")
except Exception as cleanup_error:
logger.error(f"May fail to clean up instance {instance_id}: {str(cleanup_error)}")
logger.error(f"Failed to allocate VM: {e}", exc_info=True)
if instance_id:
logger.info(f"Terminating instance {instance_id} due to an error.")
ec2_client.terminate_instances(InstanceIds=[instance_id])
raise
finally:
# Restore original signal handlers
@@ -153,6 +174,27 @@ def _allocate_vm_with_proxy(region=DEFAULT_REGION, proxy_config_file=None):
subnet_id=os.getenv('AWS_SUBNET_ID')
)
try:
ec2_client = boto3.client('ec2', region_name=region)
instance_details = ec2_client.describe_instances(InstanceIds=[instance_id])
instance = instance_details['Reservations'][0]['Instances'][0]
public_ip = instance.get('PublicIpAddress', '')
if public_ip:
vnc_url = f"http://{public_ip}:5910/vnc.html"
logger.info("="*80)
logger.info(f"🖥️ VNC Web Access URL: {vnc_url}")
logger.info(f"📡 Public IP: {public_ip}")
logger.info(f"🆔 Instance ID: {instance_id}")
if current_proxy:
logger.info(f"🌐 Proxy: {current_proxy.host}:{current_proxy.port}")
logger.info("="*80)
print(f"\n🌐 VNC Web Access URL: {vnc_url}")
if current_proxy:
print(f"🔄 Current Proxy: {current_proxy.host}:{current_proxy.port}")
print(f"📍 Please open the above address in the browser for remote desktop access\n")
except Exception as e:
logger.warning(f"Failed to get VNC address for proxy instance {instance_id}: {e}")
return instance_id
@@ -213,4 +255,4 @@ class AWSVMManager(VMManager):
else:
logger.info("Allocating a new VM in region: {}".format(region))
new_vm_path = _allocate_vm(region)
return new_vm_path
return new_vm_path

View File

@@ -63,10 +63,24 @@ class AWSProvider(Provider):
for reservation in response['Reservations']:
for instance in reservation['Instances']:
private_ip_address = instance.get('PrivateIpAddress', '')
public_ip_address = instance.get('PublicIpAddress', '')
if public_ip_address:
vnc_url = f"http://{public_ip_address}:5910/vnc.html"
logger.info("="*80)
logger.info(f"🖥️ VNC Web Access URL: {vnc_url}")
logger.info(f"📡 Public IP: {public_ip_address}")
logger.info(f"🏠 Private IP: {private_ip_address}")
logger.info("="*80)
print(f"\n🌐 VNC Web Access URL: {vnc_url}")
print(f"📍 Please open the above address in the browser for remote desktop access\n")
else:
logger.warning("No public IP address available for VNC access")
return private_ip_address
return '' # Return an empty string if no IP address is found
except ClientError as e:
logger.error(f"Failed to retrieve private IP address for the instance {path_to_vm}: {str(e)}")
logger.error(f"Failed to retrieve IP address for the instance {path_to_vm}: {str(e)}")
raise
def save_state(self, path_to_vm: str, snapshot_name: str):
@@ -74,7 +88,7 @@ class AWSProvider(Provider):
ec2_client = boto3.client('ec2', region_name=self.region)
try:
image_response = ec2_client.create_image(InstanceId=path_to_vm, ImageId=snapshot_name)
image_response = ec2_client.create_image(InstanceId=path_to_vm, Name=snapshot_name)
image_id = image_response['ImageId']
logger.info(f"AMI {image_id} created successfully from instance {path_to_vm}.")
return image_id
@@ -83,7 +97,7 @@ class AWSProvider(Provider):
raise
def revert_to_snapshot(self, path_to_vm: str, snapshot_name: str):
logger.info(f"Reverting AWS VM to snapshot: {snapshot_name}...")
logger.info(f"Reverting AWS VM to snapshot AMI: {snapshot_name}...")
ec2_client = boto3.client('ec2', region_name=self.region)
try:
@@ -93,23 +107,21 @@ class AWSProvider(Provider):
security_groups = [sg['GroupId'] for sg in instance['SecurityGroups']]
subnet_id = instance['SubnetId']
instance_type = instance['InstanceType']
instance_snapshot = instance_details['Reservations'][0]['Instances'][0]['ImageId']
# Step 2: Terminate the old instance
ec2_client.terminate_instances(InstanceIds=[path_to_vm])
logger.info(f"Old instance {path_to_vm} has been terminated.")
# Step 3: Launch a new instance from the snapshot
logger.info(f"Launching a new instance from snapshot {instance_snapshot}...")
new_instance = ec2_client.run_instances(
MaxCount = 1,
MinCount = 1,
ImageId = instance_snapshot,
InstanceType = instance_type,
EbsOptimized = True,
NetworkInterfaces = [
# Step 3: Launch a new instance from the snapshot(AMI) with performance optimization
logger.info(f"Launching a new instance from AMI {snapshot_name}...")
run_instances_params = {
"MaxCount": 1,
"MinCount": 1,
"ImageId": snapshot_name,
"InstanceType": instance_type,
"EbsOptimized": True,
"NetworkInterfaces": [
{
"SubnetId": subnet_id,
"AssociatePublicIpAddress": True,
@@ -117,13 +129,31 @@ class AWSProvider(Provider):
"Groups": security_groups
}
]
)
}
new_instance = ec2_client.run_instances(**run_instances_params)
new_instance_id = new_instance['Instances'][0]['InstanceId']
logger.info(f"New instance {new_instance_id} launched from snapshot {snapshot_name}.")
logger.info(f"New instance {new_instance_id} launched from AMI {snapshot_name}.")
logger.info(f"Waiting for instance {new_instance_id} to be running...")
ec2_client.get_waiter('instance_running').wait(InstanceIds=[new_instance_id])
logger.info(f"Instance {new_instance_id} is ready.")
try:
instance_details = ec2_client.describe_instances(InstanceIds=[new_instance_id])
instance = instance_details['Reservations'][0]['Instances'][0]
public_ip = instance.get('PublicIpAddress', '')
if public_ip:
vnc_url = f"http://{public_ip}:5910/vnc.html"
logger.info("="*80)
logger.info(f"🖥️ New Instance VNC Web Access URL: {vnc_url}")
logger.info(f"📡 Public IP: {public_ip}")
logger.info(f"🆔 New Instance ID: {new_instance_id}")
logger.info("="*80)
print(f"\n🌐 New Instance VNC Web Access URL: {vnc_url}")
print(f"📍 Please open the above address in the browser for remote desktop access\n")
except Exception as e:
logger.warning(f"Failed to get VNC address for new instance {new_instance_id}: {e}")
return new_instance_id

View File

@@ -163,16 +163,34 @@ echo "$(date): Configured proxy {self.current_proxy.host}:{self.current_proxy.po
logger.info(f"Created new instance {instance_id} with proxy configuration")
# 等待实例运行
logger.info(f"Waiting for instance {instance_id} to be running...")
ec2_client.get_waiter('instance_running').wait(InstanceIds=[instance_id])
logger.info(f"Instance {instance_id} is ready.")
try:
instance_details = ec2_client.describe_instances(InstanceIds=[instance_id])
instance = instance_details['Reservations'][0]['Instances'][0]
public_ip = instance.get('PublicIpAddress', '')
if public_ip:
vnc_url = f"http://{public_ip}:5910/vnc.html"
logger.info("="*80)
logger.info(f"🖥️ VNC Web Access URL: {vnc_url}")
logger.info(f"📡 Public IP: {public_ip}")
logger.info(f"🆔 Instance ID: {instance_id}")
if self.current_proxy:
logger.info(f"🌐 Proxy: {self.current_proxy.host}:{self.current_proxy.port}")
logger.info("="*80)
print(f"\n🌐 VNC Web Access URL: {vnc_url}")
if self.current_proxy:
print(f"🔄 Current Proxy: {self.current_proxy.host}:{self.current_proxy.port}")
print(f"📍 Please open the above address in the browser for remote desktop access\n")
except Exception as e:
logger.warning(f"Failed to get VNC address for instance {instance_id}: {e}")
return instance_id
except ClientError as e:
logger.error(f"Failed to create instance with proxy: {str(e)}")
# 如果当前代理失败,尝试轮换代理
if self.current_proxy:
proxy_pool = get_global_proxy_pool()
proxy_pool.mark_proxy_failed(self.current_proxy)
@@ -188,10 +206,28 @@ echo "$(date): Configured proxy {self.current_proxy.host}:{self.current_proxy.po
for reservation in response['Reservations']:
for instance in reservation['Instances']:
private_ip_address = instance.get('PrivateIpAddress', '')
public_ip_address = instance.get('PublicIpAddress', '')
if public_ip_address:
vnc_url = f"http://{public_ip_address}:5910/vnc.html"
logger.info("="*80)
logger.info(f"🖥️ VNC Web Access URL: {vnc_url}")
logger.info(f"📡 Public IP: {public_ip_address}")
logger.info(f"🏠 Private IP: {private_ip_address}")
if self.current_proxy:
logger.info(f"🌐 Proxy: {self.current_proxy.host}:{self.current_proxy.port}")
logger.info("="*80)
print(f"\n🌐 VNC Web Access URL: {vnc_url}")
if self.current_proxy:
print(f"🔄 Current Proxy: {self.current_proxy.host}:{self.current_proxy.port}")
print(f"📍 Please open the above address in the browser for remote desktop access\n")
else:
logger.warning("No public IP address available for VNC access")
return private_ip_address
return ''
except ClientError as e:
logger.error(f"Failed to retrieve private IP address for the instance {path_to_vm}: {str(e)}")
logger.error(f"Failed to retrieve IP address for the instance {path_to_vm}: {str(e)}")
raise
def save_state(self, path_to_vm: str, snapshot_name: str):
@@ -212,24 +248,28 @@ echo "$(date): Configured proxy {self.current_proxy.host}:{self.current_proxy.po
ec2_client = boto3.client('ec2', region_name=self.region)
try:
# 获取原实例详情
# Get original instance details for config.
instance_details = ec2_client.describe_instances(InstanceIds=[path_to_vm])
instance = instance_details['Reservations'][0]['Instances'][0]
security_groups = [sg['GroupId'] for sg in instance['SecurityGroups']]
subnet_id = instance['SubnetId']
instance_type = instance['InstanceType']
# 终止旧实例
# Terminate the old instance. This is a non-blocking call.
logger.info(f"Initiating termination for old instance {path_to_vm}...")
ec2_client.terminate_instances(InstanceIds=[path_to_vm])
logger.info(f"Old instance {path_to_vm} has been terminated.")
logger.info(f"Old instance {path_to_vm} termination initiated.")
# 轮换到新的代理
# Rotate to a new proxy
self._rotate_proxy()
# 创建新实例
# Create a new instance
new_instance_id = self.create_instance_with_proxy(
snapshot_name, instance_type, security_groups, subnet_id
)
# Note: VNC address is displayed within create_instance_with_proxy
logger.info(f"Successfully launched new instance {new_instance_id} for revert.")
return new_instance_id

View File

@@ -4,6 +4,7 @@ import platform
import shlex
import json
import subprocess, signal
import time
from pathlib import Path
from typing import Any, Optional, Sequence
from typing import List, Dict, Tuple, Literal
@@ -65,6 +66,8 @@ app = Flask(__name__)
pyautogui.PAUSE = 0
pyautogui.DARWIN_CATCH_UP_TIME = 0
TIMEOUT = 1800 # seconds
logger = app.logger
recording_process = None # fixme: this is a temporary solution for recording, need to be changed to support multiple-process
recording_path = "/tmp/recording.mp4"
@@ -202,8 +205,8 @@ def capture_screen_with_cursor():
pos = (round(pos_win[0]*ratio - hotspotx), round(pos_win[1]*ratio - hotspoty))
img.paste(cursor, pos, cursor)
except:
pass
except Exception as e:
logger.warning(f"Failed to capture cursor on Windows, screenshot will not have a cursor. Error: {e}")
img.save(file_path)
elif user_platform == "Linux":
@@ -1124,18 +1127,72 @@ def open_file():
if not path:
return "Path not supplied!", 400
path = Path(os.path.expandvars(os.path.expanduser(path)))
path_obj = Path(os.path.expandvars(os.path.expanduser(path)))
if not path.exists():
return f"File not found: {path}", 404
if not path_obj.exists():
return f"File not found: {path_obj}", 404
try:
if platform.system() == "Windows":
os.startfile(path)
os.startfile(path_obj)
else:
open_cmd: str = "open" if platform.system() == "Darwin" else "xdg-open"
subprocess.Popen([open_cmd, str(path)])
return "File opened successfully"
subprocess.Popen([open_cmd, str(path_obj)])
# Wait for the file to open
file_name = path_obj.name
# Some apps don't include the extension in the title
file_name_without_ext, _ = os.path.splitext(file_name)
start_time = time.time()
window_found = False
while time.time() - start_time < TIMEOUT:
os_name = platform.system()
if os_name in ['Windows', 'Darwin']:
import pygetwindow as gw
# Check for window title containing file name or file name without extension
windows = gw.getWindowsWithTitle(file_name)
if not windows:
windows = gw.getWindowsWithTitle(file_name_without_ext)
if windows:
# To be more specific, we can try to activate it
windows[0].activate()
window_found = True
break
elif os_name == 'Linux':
try:
# Using wmctrl to list windows and check if any window title contains the filename
result = subprocess.run(['wmctrl', '-l'], capture_output=True, text=True, check=True)
window_list = result.stdout.strip().split('\n')
if not result.stdout.strip():
pass # No windows, just continue waiting
else:
for window in window_list:
if file_name in window or file_name_without_ext in window:
# a window is found, now activate it
window_id = window.split()[0]
subprocess.run(['wmctrl', '-i', '-a', window_id], check=True)
window_found = True
break
if window_found:
break
except (subprocess.CalledProcessError, FileNotFoundError):
# wmctrl might not be installed or the window manager isn't ready.
# We just log it once and let the main loop retry.
if 'wmctrl_failed_once' not in locals():
logger.warning("wmctrl command is not ready, will keep retrying...")
wmctrl_failed_once = True
pass # Let the outer loop retry
time.sleep(1)
if window_found:
return "File opened and window activated successfully"
else:
return f"Failed to find window for {file_name} within {timeout} seconds.", 500
except Exception as e:
return f"Failed to open {path}. Error: {e}", 500
@@ -1258,37 +1315,78 @@ def close_window():
@app.route('/start_recording', methods=['POST'])
def start_recording():
global recording_process
if recording_process:
if recording_process and recording_process.poll() is None:
return jsonify({'status': 'error', 'message': 'Recording is already in progress.'}), 400
# Clean up previous recording if it exists
if os.path.exists(recording_path):
try:
os.remove(recording_path)
except OSError as e:
logger.error(f"Error removing old recording file: {e}")
return jsonify({'status': 'error', 'message': f'Failed to remove old recording file: {e}'}), 500
d = display.Display()
screen_width = d.screen().width_in_pixels
screen_height = d.screen().height_in_pixels
start_command = f"ffmpeg -y -f x11grab -draw_mouse 1 -s {screen_width}x{screen_height} -i :0.0 -c:v libx264 -r 30 {recording_path}"
recording_process = subprocess.Popen(shlex.split(start_command), stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL)
# Use stderr=PIPE to capture potential errors from ffmpeg
recording_process = subprocess.Popen(shlex.split(start_command),
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
text=True # To get stderr as string
)
return jsonify({'status': 'success', 'message': 'Started recording.'})
# Wait a couple of seconds to see if ffmpeg starts successfully
try:
# Wait for 2 seconds. If ffmpeg exits within this time, it's an error.
recording_process.wait(timeout=2)
# If wait() returns, it means the process has terminated.
error_output = recording_process.stderr.read()
return jsonify({
'status': 'error',
'message': f'Failed to start recording. ffmpeg terminated unexpectedly. Error: {error_output}'
}), 500
except subprocess.TimeoutExpired:
# This is the expected outcome: the process is still running after 2 seconds.
return jsonify({'status': 'success', 'message': 'Started recording successfully.'})
@app.route('/end_recording', methods=['POST'])
def end_recording():
global recording_process
if not recording_process:
if not recording_process or recording_process.poll() is not None:
recording_process = None # Clean up stale process object
return jsonify({'status': 'error', 'message': 'No recording in progress to stop.'}), 400
error_output = ""
try:
# Send SIGINT for a graceful shutdown, allowing ffmpeg to finalize the file.
recording_process.send_signal(signal.SIGINT)
recording_process.wait()
# Wait for ffmpeg to terminate. communicate() gets output and waits.
_, error_output = recording_process.communicate(timeout=15)
except subprocess.TimeoutExpired:
logger.error("ffmpeg did not respond to SIGINT, killing the process.")
recording_process.kill()
# After killing, communicate to get any remaining output.
_, error_output = recording_process.communicate()
recording_process = None
return jsonify({
'status': 'error',
'message': f'Recording process was unresponsive and had to be killed. Stderr: {error_output}'
}), 500
# return recording video file
if os.path.exists(recording_path):
recording_process = None # Clear the process from global state
# Check if the recording file was created and is not empty.
if os.path.exists(recording_path) and os.path.getsize(recording_path) > 0:
return send_file(recording_path, as_attachment=True)
else:
return abort(404, description="Recording failed")
logger.error(f"Recording failed. The output file is missing or empty. ffmpeg stderr: {error_output}")
return abort(500, description=f"Recording failed. The output file is missing or empty. ffmpeg stderr: {error_output}")
if __name__ == '__main__':

View File

@@ -27,17 +27,57 @@
"libreoffice_writer"
],
"evaluator": {
"func": "compare_pdfs",
"expected": {
"func": [
"compare_pdfs",
"compare_pdfs",
"compare_pdfs",
"compare_pdfs"
],
"conj": "or",
"expected": [
{
"type": "cloud_file",
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf",
"dest": "Constitution_Template_With_Guidelines_Gold_1.pdf"
},
{
"type": "cloud_file",
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf",
"dest": "Constitution_Template_With_Guidelines_Gold_2.pdf"
},
{
"type": "cloud_file",
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf",
"dest": "Constitution_Template_With_Guidelines_Gold.pdf"
"dest": "Constitution_Template_With_Guidelines_Gold_3.pdf"
},
"result": {
{
"type": "cloud_file",
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf",
"dest": "Constitution_Template_With_Guidelines_Gold_4.pdf"
}
],
"result": [
{
"type": "vm_file",
"path": "/home/user/Desktop/View_Person_Organizational_Summary.pdf",
"dest": "Constitution_Template_With_Guidelines.pdf"
}
"dest": "Constitution_Template_With_Guidelines_1.pdf"
},
{
"type": "vm_file",
"path": "/home/user/Documents/View_Person_Organizational_Summary.pdf",
"dest": "Constitution_Template_With_Guidelines_2.pdf"
},
{
"type": "vm_file",
"path": "/home/user/Downloads/View_Person_Organizational_Summary.pdf",
"dest": "Constitution_Template_With_Guidelines_3.pdf"
},
{
"type": "vm_file",
"path": "/home/user/View_Person_Organizational_Summary.pdf",
"dest": "Constitution_Template_With_Guidelines_4.pdf"
}
]
},
"proxy": false
}

View File

@@ -38,7 +38,7 @@
"command": [
"python",
"-c",
"import pyautogui; import time; time.sleep(5); pyautogui.press(\"down\", presses=8, interval=0.01); time.sleep(1); pyautogui.scroll(-2)"
"import pyautogui; import time; time.sleep(15); pyautogui.press(\"down\", presses=8, interval=0.01); time.sleep(1); pyautogui.scroll(-2)"
]
}
}
@@ -68,12 +68,12 @@
"command": [
"python",
"-c",
"import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); pyautogui.press('down'); time.sleep(0.5); pyautogui.press('enter');"
"import pyautogui; import time; time.sleep(1); pyautogui.hotkey('ctrl', 's'); time.sleep(3);"
]
}
}
],
"func": "compare_contains_image",
"func": "compare_docx_images",
"result": {
"type": "vm_file",
"path": "/home/user/Desktop/Viewing_Your_Class_Schedule_and_Textbooks.docx",

View File

@@ -52,7 +52,7 @@
}
}
],
"func": "compare_docx_lines",
"func": "compare_unique_train_records",
"result": {
"type": "vm_file",
"path": "/home/user/Desktop/HK_train_record.docx",
@@ -60,8 +60,16 @@
},
"expected": {
"type": "cloud_file",
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108/HK_train_record_Gold.docx",
"dest": "HK_train_record_Gold.docx"
"path": [
"https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108/HK_train_record_Gold.docx",
"https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108/HK_train_record.docx"
],
"dest": [
"HK_train_record_Gold.docx",
"HK_train_record_Original.docx"
],
"multi": true,
"gives": [0, 1]
}
},
"proxy": false

View File

@@ -52,20 +52,57 @@
}
}
],
"func": "compare_docx_files",
"expected": {
"type": "cloud_file",
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48/CCCH9003_Tutorial_guidelines_Gold.docx",
"dest": "CCCH9003_Tutorial_guidelines_Gold.docx"
},
"result": {
"type": "vm_file",
"path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx",
"dest": "CCCH9003_Tutorial_guidelines.docx"
},
"options": {
"ignore_blanks": false
}
"func": [
"compare_docx_files",
"compare_docx_files",
"compare_docx_files"
],
"conj": "or",
"expected": [
{
"type": "cloud_file",
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48/CCCH9003_Tutorial_guidelines_Gold_1.docx",
"dest": "CCCH9003_Tutorial_guidelines_Gold_1.docx"
},
{
"type": "cloud_file",
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48/CCCH9003_Tutorial_guidelines_Gold_2.docx",
"dest": "CCCH9003_Tutorial_guidelines_Gold_2.docx"
},
{
"type": "cloud_file",
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48/CCCH9003_Tutorial_guidelines_Gold_3.docx",
"dest": "CCCH9003_Tutorial_guidelines_Gold_3.docx"
}
],
"result": [
{
"type": "vm_file",
"path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx",
"dest": "CCCH9003_Tutorial_guidelines.docx"
},
{
"type": "vm_file",
"path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx",
"dest": "CCCH9003_Tutorial_guidelines.docx"
},
{
"type": "vm_file",
"path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx",
"dest": "CCCH9003_Tutorial_guidelines.docx"
}
],
"options": [
{
"ignore_blanks": false
},
{
"ignore_blanks": false
},
{
"ignore_blanks": false
}
]
},
"proxy": false
}

View File

@@ -47,22 +47,40 @@
"command": [
"python",
"-c",
"import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); "
"import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(2); "
]
}
}
],
"func": "compare_docx_tables",
"expected": {
"type": "cloud_file",
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f/Graphemes_Sound_Letter_Patterns_Gold.docx",
"dest": "Graphemes_Sound_Letter_Patterns_Gold.docx"
},
"result": {
"type": "vm_file",
"path": "/home/user/Desktop/Graphemes_Sound_Letter_Patterns.docx",
"dest": "Graphemes_Sound_Letter_Patterns.docx"
}
"func": [
"compare_docx_tables",
"compare_docx_tables"
],
"conj": "or",
"expected": [
{
"type": "cloud_file",
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f/Graphemes_Sound_Letter_Patterns_Gold.docx",
"dest": "Graphemes_Sound_Letter_Patterns_Gold.docx"
},
{
"type": "cloud_file",
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f/Graphemes_Sound_Letter_Patterns_Gold_2.docx",
"dest": "Graphemes_Sound_Letter_Patterns_Gold_2.docx"
}
],
"result": [
{
"type": "vm_file",
"path": "/home/user/Desktop/Graphemes_Sound_Letter_Patterns.docx",
"dest": "Graphemes_Sound_Letter_Patterns.docx"
},
{
"type": "vm_file",
"path": "/home/user/Desktop/Graphemes_Sound_Letter_Patterns.docx",
"dest": "Graphemes_Sound_Letter_Patterns.docx"
}
]
},
"proxy": false
}