Robust Evaluation, Blocking File Open, Grader Sensitivity, and LibreOffice Writer Fixes (#217)
* Refactor evaluator structure in LibreOffice Writer example JSON to support multiple expected and result files, enhancing evaluation flexibility. * Update instance type to t3.large and add VNC access URL logging for allocated VMs, enhancing remote access capabilities. * Update instance type to t3.large and add VNC access URL logging for allocated VMs, enhancing remote access capabilities. * Update time format in get_vm_file function to include hours, minutes, and seconds for more precise file naming with time suffix. * More delay for 936321ce-5236-426a-9a20-e0e3c5dc536f; support one more potential solutions. * Enhance SetupController with configurable retry limit and improved error handling for file opening requests. Introduce new function to compare unique training records, and update logging for better debugging. Adjust JSON examples for evaluation to support multiple expected and result files. * Clean debug code --------- Co-authored-by: yuanmengqi <yuanmengqi@mail.ustc.edu.cn>
This commit is contained in:
@@ -5,6 +5,9 @@ import psutil
|
||||
import logging
|
||||
import dotenv
|
||||
import signal
|
||||
|
||||
INSTANCE_TYPE = "t3.large"
|
||||
|
||||
# Load environment variables from .env file
|
||||
dotenv.load_dotenv()
|
||||
|
||||
@@ -31,37 +34,17 @@ logger.setLevel(logging.INFO)
|
||||
DEFAULT_REGION = "us-east-1"
|
||||
# todo: Add doc for the configuration of image, security group and network interface
|
||||
# todo: public the AMI images
|
||||
# ami-05e7d7bd279ea4f14
|
||||
IMAGE_ID_MAP = {
|
||||
"us-east-1": "ami-00674d875de9addc1",
|
||||
"us-east-1": "ami-03a22c6e501415fb1",
|
||||
"ap-east-1": "ami-0c092a5b8be4116f5",
|
||||
}
|
||||
|
||||
INSTANCE_TYPE = "t3.medium"
|
||||
|
||||
def _allocate_vm(region=DEFAULT_REGION):
|
||||
|
||||
if region not in IMAGE_ID_MAP:
|
||||
raise ValueError(f"Region {region} is not supported. Supported regions are: {list(IMAGE_ID_MAP.keys())}")
|
||||
|
||||
run_instances_params = {
|
||||
"MaxCount": 1,
|
||||
"MinCount": 1,
|
||||
"ImageId": IMAGE_ID_MAP[region],
|
||||
"InstanceType": INSTANCE_TYPE,
|
||||
"EbsOptimized": True,
|
||||
"NetworkInterfaces": [
|
||||
{
|
||||
"SubnetId": os.getenv('AWS_SUBNET_ID'),
|
||||
"AssociatePublicIpAddress": True,
|
||||
"DeviceIndex": 0,
|
||||
"Groups": [
|
||||
os.getenv('AWS_SECURITY_GROUP_ID')
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
ec2_client = boto3.client('ec2', region_name=region)
|
||||
instance_id = None
|
||||
original_sigint_handler = signal.getsignal(signal.SIGINT)
|
||||
@@ -94,26 +77,64 @@ def _allocate_vm(region=DEFAULT_REGION):
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
if not os.getenv('AWS_SECURITY_GROUP_ID'):
|
||||
raise ValueError("AWS_SECURITY_GROUP_ID is not set in the environment variables.")
|
||||
if not os.getenv('AWS_SUBNET_ID'):
|
||||
raise ValueError("AWS_SUBNET_ID is not set in the environment variables.")
|
||||
|
||||
run_instances_params = {
|
||||
"MaxCount": 1,
|
||||
"MinCount": 1,
|
||||
"ImageId": IMAGE_ID_MAP[region],
|
||||
"InstanceType": INSTANCE_TYPE,
|
||||
"EbsOptimized": True,
|
||||
"NetworkInterfaces": [
|
||||
{
|
||||
"SubnetId": os.getenv('AWS_SUBNET_ID'),
|
||||
"AssociatePublicIpAddress": True,
|
||||
"DeviceIndex": 0,
|
||||
"Groups": [
|
||||
os.getenv('AWS_SECURITY_GROUP_ID')
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
response = ec2_client.run_instances(**run_instances_params)
|
||||
instance_id = response['Instances'][0]['InstanceId']
|
||||
|
||||
waiter = ec2_client.get_waiter('instance_running')
|
||||
logger.info(f"Waiting for instance {instance_id} to be running...")
|
||||
ec2_client.get_waiter('instance_running').wait(InstanceIds=[instance_id])
|
||||
waiter.wait(InstanceIds=[instance_id])
|
||||
logger.info(f"Instance {instance_id} is ready.")
|
||||
|
||||
# 获取并显示VNC访问地址
|
||||
try:
|
||||
instance_details = ec2_client.describe_instances(InstanceIds=[instance_id])
|
||||
instance = instance_details['Reservations'][0]['Instances'][0]
|
||||
public_ip = instance.get('PublicIpAddress', '')
|
||||
if public_ip:
|
||||
vnc_url = f"http://{public_ip}:5910/vnc.html"
|
||||
logger.info("="*80)
|
||||
logger.info(f"🖥️ VNC Web Access URL: {vnc_url}")
|
||||
logger.info(f"📡 Public IP: {public_ip}")
|
||||
logger.info(f"🆔 Instance ID: {instance_id}")
|
||||
logger.info("="*80)
|
||||
print(f"\n🌐 VNC访问地址: {vnc_url}")
|
||||
print(f"📍 请在浏览器中打开上述地址进行远程桌面访问\n")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get VNC address for instance {instance_id}: {e}")
|
||||
except KeyboardInterrupt:
|
||||
logger.warning("VM allocation interrupted by user (SIGINT).")
|
||||
raise
|
||||
except SystemExit:
|
||||
logger.warning("VM allocation terminated by parent process (SIGTERM).")
|
||||
if instance_id:
|
||||
logger.info(f"Terminating instance {instance_id} due to interruption.")
|
||||
ec2_client.terminate_instances(InstanceIds=[instance_id])
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to allocate VM in region {region}: {str(e)}")
|
||||
# try to clean up any resources that were created
|
||||
try:
|
||||
if instance_id:
|
||||
ec2_client.terminate_instances(InstanceIds=[instance_id])
|
||||
logger.info(f"Terminated instance {instance_id} due to allocation failure.")
|
||||
except Exception as cleanup_error:
|
||||
logger.error(f"May fail to clean up instance {instance_id}: {str(cleanup_error)}")
|
||||
logger.error(f"Failed to allocate VM: {e}", exc_info=True)
|
||||
if instance_id:
|
||||
logger.info(f"Terminating instance {instance_id} due to an error.")
|
||||
ec2_client.terminate_instances(InstanceIds=[instance_id])
|
||||
raise
|
||||
finally:
|
||||
# Restore original signal handlers
|
||||
@@ -153,6 +174,27 @@ def _allocate_vm_with_proxy(region=DEFAULT_REGION, proxy_config_file=None):
|
||||
subnet_id=os.getenv('AWS_SUBNET_ID')
|
||||
)
|
||||
|
||||
try:
|
||||
ec2_client = boto3.client('ec2', region_name=region)
|
||||
instance_details = ec2_client.describe_instances(InstanceIds=[instance_id])
|
||||
instance = instance_details['Reservations'][0]['Instances'][0]
|
||||
public_ip = instance.get('PublicIpAddress', '')
|
||||
if public_ip:
|
||||
vnc_url = f"http://{public_ip}:5910/vnc.html"
|
||||
logger.info("="*80)
|
||||
logger.info(f"🖥️ VNC Web Access URL: {vnc_url}")
|
||||
logger.info(f"📡 Public IP: {public_ip}")
|
||||
logger.info(f"🆔 Instance ID: {instance_id}")
|
||||
if current_proxy:
|
||||
logger.info(f"🌐 Proxy: {current_proxy.host}:{current_proxy.port}")
|
||||
logger.info("="*80)
|
||||
print(f"\n🌐 VNC Web Access URL: {vnc_url}")
|
||||
if current_proxy:
|
||||
print(f"🔄 Current Proxy: {current_proxy.host}:{current_proxy.port}")
|
||||
print(f"📍 Please open the above address in the browser for remote desktop access\n")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get VNC address for proxy instance {instance_id}: {e}")
|
||||
|
||||
return instance_id
|
||||
|
||||
|
||||
@@ -213,4 +255,4 @@ class AWSVMManager(VMManager):
|
||||
else:
|
||||
logger.info("Allocating a new VM in region: {}".format(region))
|
||||
new_vm_path = _allocate_vm(region)
|
||||
return new_vm_path
|
||||
return new_vm_path
|
||||
@@ -63,10 +63,24 @@ class AWSProvider(Provider):
|
||||
for reservation in response['Reservations']:
|
||||
for instance in reservation['Instances']:
|
||||
private_ip_address = instance.get('PrivateIpAddress', '')
|
||||
public_ip_address = instance.get('PublicIpAddress', '')
|
||||
|
||||
if public_ip_address:
|
||||
vnc_url = f"http://{public_ip_address}:5910/vnc.html"
|
||||
logger.info("="*80)
|
||||
logger.info(f"🖥️ VNC Web Access URL: {vnc_url}")
|
||||
logger.info(f"📡 Public IP: {public_ip_address}")
|
||||
logger.info(f"🏠 Private IP: {private_ip_address}")
|
||||
logger.info("="*80)
|
||||
print(f"\n🌐 VNC Web Access URL: {vnc_url}")
|
||||
print(f"📍 Please open the above address in the browser for remote desktop access\n")
|
||||
else:
|
||||
logger.warning("No public IP address available for VNC access")
|
||||
|
||||
return private_ip_address
|
||||
return '' # Return an empty string if no IP address is found
|
||||
except ClientError as e:
|
||||
logger.error(f"Failed to retrieve private IP address for the instance {path_to_vm}: {str(e)}")
|
||||
logger.error(f"Failed to retrieve IP address for the instance {path_to_vm}: {str(e)}")
|
||||
raise
|
||||
|
||||
def save_state(self, path_to_vm: str, snapshot_name: str):
|
||||
@@ -74,7 +88,7 @@ class AWSProvider(Provider):
|
||||
ec2_client = boto3.client('ec2', region_name=self.region)
|
||||
|
||||
try:
|
||||
image_response = ec2_client.create_image(InstanceId=path_to_vm, ImageId=snapshot_name)
|
||||
image_response = ec2_client.create_image(InstanceId=path_to_vm, Name=snapshot_name)
|
||||
image_id = image_response['ImageId']
|
||||
logger.info(f"AMI {image_id} created successfully from instance {path_to_vm}.")
|
||||
return image_id
|
||||
@@ -83,7 +97,7 @@ class AWSProvider(Provider):
|
||||
raise
|
||||
|
||||
def revert_to_snapshot(self, path_to_vm: str, snapshot_name: str):
|
||||
logger.info(f"Reverting AWS VM to snapshot: {snapshot_name}...")
|
||||
logger.info(f"Reverting AWS VM to snapshot AMI: {snapshot_name}...")
|
||||
ec2_client = boto3.client('ec2', region_name=self.region)
|
||||
|
||||
try:
|
||||
@@ -93,23 +107,21 @@ class AWSProvider(Provider):
|
||||
security_groups = [sg['GroupId'] for sg in instance['SecurityGroups']]
|
||||
subnet_id = instance['SubnetId']
|
||||
instance_type = instance['InstanceType']
|
||||
instance_snapshot = instance_details['Reservations'][0]['Instances'][0]['ImageId']
|
||||
|
||||
|
||||
# Step 2: Terminate the old instance
|
||||
ec2_client.terminate_instances(InstanceIds=[path_to_vm])
|
||||
logger.info(f"Old instance {path_to_vm} has been terminated.")
|
||||
|
||||
# Step 3: Launch a new instance from the snapshot
|
||||
logger.info(f"Launching a new instance from snapshot {instance_snapshot}...")
|
||||
|
||||
|
||||
new_instance = ec2_client.run_instances(
|
||||
MaxCount = 1,
|
||||
MinCount = 1,
|
||||
ImageId = instance_snapshot,
|
||||
InstanceType = instance_type,
|
||||
EbsOptimized = True,
|
||||
NetworkInterfaces = [
|
||||
# Step 3: Launch a new instance from the snapshot(AMI) with performance optimization
|
||||
logger.info(f"Launching a new instance from AMI {snapshot_name}...")
|
||||
|
||||
run_instances_params = {
|
||||
"MaxCount": 1,
|
||||
"MinCount": 1,
|
||||
"ImageId": snapshot_name,
|
||||
"InstanceType": instance_type,
|
||||
"EbsOptimized": True,
|
||||
"NetworkInterfaces": [
|
||||
{
|
||||
"SubnetId": subnet_id,
|
||||
"AssociatePublicIpAddress": True,
|
||||
@@ -117,13 +129,31 @@ class AWSProvider(Provider):
|
||||
"Groups": security_groups
|
||||
}
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
new_instance = ec2_client.run_instances(**run_instances_params)
|
||||
new_instance_id = new_instance['Instances'][0]['InstanceId']
|
||||
logger.info(f"New instance {new_instance_id} launched from snapshot {snapshot_name}.")
|
||||
logger.info(f"New instance {new_instance_id} launched from AMI {snapshot_name}.")
|
||||
logger.info(f"Waiting for instance {new_instance_id} to be running...")
|
||||
ec2_client.get_waiter('instance_running').wait(InstanceIds=[new_instance_id])
|
||||
|
||||
logger.info(f"Instance {new_instance_id} is ready.")
|
||||
|
||||
try:
|
||||
instance_details = ec2_client.describe_instances(InstanceIds=[new_instance_id])
|
||||
instance = instance_details['Reservations'][0]['Instances'][0]
|
||||
public_ip = instance.get('PublicIpAddress', '')
|
||||
if public_ip:
|
||||
vnc_url = f"http://{public_ip}:5910/vnc.html"
|
||||
logger.info("="*80)
|
||||
logger.info(f"🖥️ New Instance VNC Web Access URL: {vnc_url}")
|
||||
logger.info(f"📡 Public IP: {public_ip}")
|
||||
logger.info(f"🆔 New Instance ID: {new_instance_id}")
|
||||
logger.info("="*80)
|
||||
print(f"\n🌐 New Instance VNC Web Access URL: {vnc_url}")
|
||||
print(f"📍 Please open the above address in the browser for remote desktop access\n")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get VNC address for new instance {new_instance_id}: {e}")
|
||||
|
||||
return new_instance_id
|
||||
|
||||
|
||||
@@ -163,16 +163,34 @@ echo "$(date): Configured proxy {self.current_proxy.host}:{self.current_proxy.po
|
||||
|
||||
logger.info(f"Created new instance {instance_id} with proxy configuration")
|
||||
|
||||
# 等待实例运行
|
||||
logger.info(f"Waiting for instance {instance_id} to be running...")
|
||||
ec2_client.get_waiter('instance_running').wait(InstanceIds=[instance_id])
|
||||
logger.info(f"Instance {instance_id} is ready.")
|
||||
|
||||
try:
|
||||
instance_details = ec2_client.describe_instances(InstanceIds=[instance_id])
|
||||
instance = instance_details['Reservations'][0]['Instances'][0]
|
||||
public_ip = instance.get('PublicIpAddress', '')
|
||||
if public_ip:
|
||||
vnc_url = f"http://{public_ip}:5910/vnc.html"
|
||||
logger.info("="*80)
|
||||
logger.info(f"🖥️ VNC Web Access URL: {vnc_url}")
|
||||
logger.info(f"📡 Public IP: {public_ip}")
|
||||
logger.info(f"🆔 Instance ID: {instance_id}")
|
||||
if self.current_proxy:
|
||||
logger.info(f"🌐 Proxy: {self.current_proxy.host}:{self.current_proxy.port}")
|
||||
logger.info("="*80)
|
||||
print(f"\n🌐 VNC Web Access URL: {vnc_url}")
|
||||
if self.current_proxy:
|
||||
print(f"🔄 Current Proxy: {self.current_proxy.host}:{self.current_proxy.port}")
|
||||
print(f"📍 Please open the above address in the browser for remote desktop access\n")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get VNC address for instance {instance_id}: {e}")
|
||||
|
||||
return instance_id
|
||||
|
||||
except ClientError as e:
|
||||
logger.error(f"Failed to create instance with proxy: {str(e)}")
|
||||
# 如果当前代理失败,尝试轮换代理
|
||||
if self.current_proxy:
|
||||
proxy_pool = get_global_proxy_pool()
|
||||
proxy_pool.mark_proxy_failed(self.current_proxy)
|
||||
@@ -188,10 +206,28 @@ echo "$(date): Configured proxy {self.current_proxy.host}:{self.current_proxy.po
|
||||
for reservation in response['Reservations']:
|
||||
for instance in reservation['Instances']:
|
||||
private_ip_address = instance.get('PrivateIpAddress', '')
|
||||
public_ip_address = instance.get('PublicIpAddress', '')
|
||||
|
||||
if public_ip_address:
|
||||
vnc_url = f"http://{public_ip_address}:5910/vnc.html"
|
||||
logger.info("="*80)
|
||||
logger.info(f"🖥️ VNC Web Access URL: {vnc_url}")
|
||||
logger.info(f"📡 Public IP: {public_ip_address}")
|
||||
logger.info(f"🏠 Private IP: {private_ip_address}")
|
||||
if self.current_proxy:
|
||||
logger.info(f"🌐 Proxy: {self.current_proxy.host}:{self.current_proxy.port}")
|
||||
logger.info("="*80)
|
||||
print(f"\n🌐 VNC Web Access URL: {vnc_url}")
|
||||
if self.current_proxy:
|
||||
print(f"🔄 Current Proxy: {self.current_proxy.host}:{self.current_proxy.port}")
|
||||
print(f"📍 Please open the above address in the browser for remote desktop access\n")
|
||||
else:
|
||||
logger.warning("No public IP address available for VNC access")
|
||||
|
||||
return private_ip_address
|
||||
return ''
|
||||
except ClientError as e:
|
||||
logger.error(f"Failed to retrieve private IP address for the instance {path_to_vm}: {str(e)}")
|
||||
logger.error(f"Failed to retrieve IP address for the instance {path_to_vm}: {str(e)}")
|
||||
raise
|
||||
|
||||
def save_state(self, path_to_vm: str, snapshot_name: str):
|
||||
@@ -212,24 +248,28 @@ echo "$(date): Configured proxy {self.current_proxy.host}:{self.current_proxy.po
|
||||
ec2_client = boto3.client('ec2', region_name=self.region)
|
||||
|
||||
try:
|
||||
# 获取原实例详情
|
||||
# Get original instance details for config.
|
||||
instance_details = ec2_client.describe_instances(InstanceIds=[path_to_vm])
|
||||
instance = instance_details['Reservations'][0]['Instances'][0]
|
||||
security_groups = [sg['GroupId'] for sg in instance['SecurityGroups']]
|
||||
subnet_id = instance['SubnetId']
|
||||
instance_type = instance['InstanceType']
|
||||
|
||||
# 终止旧实例
|
||||
# Terminate the old instance. This is a non-blocking call.
|
||||
logger.info(f"Initiating termination for old instance {path_to_vm}...")
|
||||
ec2_client.terminate_instances(InstanceIds=[path_to_vm])
|
||||
logger.info(f"Old instance {path_to_vm} has been terminated.")
|
||||
logger.info(f"Old instance {path_to_vm} termination initiated.")
|
||||
|
||||
# 轮换到新的代理
|
||||
# Rotate to a new proxy
|
||||
self._rotate_proxy()
|
||||
|
||||
# 创建新实例
|
||||
# Create a new instance
|
||||
new_instance_id = self.create_instance_with_proxy(
|
||||
snapshot_name, instance_type, security_groups, subnet_id
|
||||
)
|
||||
|
||||
# Note: VNC address is displayed within create_instance_with_proxy
|
||||
logger.info(f"Successfully launched new instance {new_instance_id} for revert.")
|
||||
|
||||
return new_instance_id
|
||||
|
||||
|
||||
Reference in New Issue
Block a user