Robust Evaluation, Blocking File Open, Grader Sensitivity, and LibreOffice Writer Fixes (#217)
* Refactor evaluator structure in LibreOffice Writer example JSON to support multiple expected and result files, enhancing evaluation flexibility. * Update instance type to t3.large and add VNC access URL logging for allocated VMs, enhancing remote access capabilities. * Update instance type to t3.large and add VNC access URL logging for allocated VMs, enhancing remote access capabilities. * Update time format in get_vm_file function to include hours, minutes, and seconds for more precise file naming with time suffix. * More delay for 936321ce-5236-426a-9a20-e0e3c5dc536f; support one more potential solutions. * Enhance SetupController with configurable retry limit and improved error handling for file opening requests. Introduce new function to compare unique training records, and update logging for better debugging. Adjust JSON examples for evaluation to support multiple expected and result files. * Clean debug code --------- Co-authored-by: yuanmengqi <yuanmengqi@mail.ustc.edu.cn>
This commit is contained in:
@@ -5,6 +5,9 @@ import psutil
|
||||
import logging
|
||||
import dotenv
|
||||
import signal
|
||||
|
||||
INSTANCE_TYPE = "t3.large"
|
||||
|
||||
# Load environment variables from .env file
|
||||
dotenv.load_dotenv()
|
||||
|
||||
@@ -31,37 +34,17 @@ logger.setLevel(logging.INFO)
|
||||
DEFAULT_REGION = "us-east-1"
|
||||
# todo: Add doc for the configuration of image, security group and network interface
|
||||
# todo: public the AMI images
|
||||
# ami-05e7d7bd279ea4f14
|
||||
IMAGE_ID_MAP = {
|
||||
"us-east-1": "ami-00674d875de9addc1",
|
||||
"us-east-1": "ami-03a22c6e501415fb1",
|
||||
"ap-east-1": "ami-0c092a5b8be4116f5",
|
||||
}
|
||||
|
||||
INSTANCE_TYPE = "t3.medium"
|
||||
|
||||
def _allocate_vm(region=DEFAULT_REGION):
|
||||
|
||||
if region not in IMAGE_ID_MAP:
|
||||
raise ValueError(f"Region {region} is not supported. Supported regions are: {list(IMAGE_ID_MAP.keys())}")
|
||||
|
||||
run_instances_params = {
|
||||
"MaxCount": 1,
|
||||
"MinCount": 1,
|
||||
"ImageId": IMAGE_ID_MAP[region],
|
||||
"InstanceType": INSTANCE_TYPE,
|
||||
"EbsOptimized": True,
|
||||
"NetworkInterfaces": [
|
||||
{
|
||||
"SubnetId": os.getenv('AWS_SUBNET_ID'),
|
||||
"AssociatePublicIpAddress": True,
|
||||
"DeviceIndex": 0,
|
||||
"Groups": [
|
||||
os.getenv('AWS_SECURITY_GROUP_ID')
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
ec2_client = boto3.client('ec2', region_name=region)
|
||||
instance_id = None
|
||||
original_sigint_handler = signal.getsignal(signal.SIGINT)
|
||||
@@ -94,26 +77,64 @@ def _allocate_vm(region=DEFAULT_REGION):
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
if not os.getenv('AWS_SECURITY_GROUP_ID'):
|
||||
raise ValueError("AWS_SECURITY_GROUP_ID is not set in the environment variables.")
|
||||
if not os.getenv('AWS_SUBNET_ID'):
|
||||
raise ValueError("AWS_SUBNET_ID is not set in the environment variables.")
|
||||
|
||||
run_instances_params = {
|
||||
"MaxCount": 1,
|
||||
"MinCount": 1,
|
||||
"ImageId": IMAGE_ID_MAP[region],
|
||||
"InstanceType": INSTANCE_TYPE,
|
||||
"EbsOptimized": True,
|
||||
"NetworkInterfaces": [
|
||||
{
|
||||
"SubnetId": os.getenv('AWS_SUBNET_ID'),
|
||||
"AssociatePublicIpAddress": True,
|
||||
"DeviceIndex": 0,
|
||||
"Groups": [
|
||||
os.getenv('AWS_SECURITY_GROUP_ID')
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
response = ec2_client.run_instances(**run_instances_params)
|
||||
instance_id = response['Instances'][0]['InstanceId']
|
||||
|
||||
waiter = ec2_client.get_waiter('instance_running')
|
||||
logger.info(f"Waiting for instance {instance_id} to be running...")
|
||||
ec2_client.get_waiter('instance_running').wait(InstanceIds=[instance_id])
|
||||
waiter.wait(InstanceIds=[instance_id])
|
||||
logger.info(f"Instance {instance_id} is ready.")
|
||||
|
||||
# 获取并显示VNC访问地址
|
||||
try:
|
||||
instance_details = ec2_client.describe_instances(InstanceIds=[instance_id])
|
||||
instance = instance_details['Reservations'][0]['Instances'][0]
|
||||
public_ip = instance.get('PublicIpAddress', '')
|
||||
if public_ip:
|
||||
vnc_url = f"http://{public_ip}:5910/vnc.html"
|
||||
logger.info("="*80)
|
||||
logger.info(f"🖥️ VNC Web Access URL: {vnc_url}")
|
||||
logger.info(f"📡 Public IP: {public_ip}")
|
||||
logger.info(f"🆔 Instance ID: {instance_id}")
|
||||
logger.info("="*80)
|
||||
print(f"\n🌐 VNC访问地址: {vnc_url}")
|
||||
print(f"📍 请在浏览器中打开上述地址进行远程桌面访问\n")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get VNC address for instance {instance_id}: {e}")
|
||||
except KeyboardInterrupt:
|
||||
logger.warning("VM allocation interrupted by user (SIGINT).")
|
||||
raise
|
||||
except SystemExit:
|
||||
logger.warning("VM allocation terminated by parent process (SIGTERM).")
|
||||
if instance_id:
|
||||
logger.info(f"Terminating instance {instance_id} due to interruption.")
|
||||
ec2_client.terminate_instances(InstanceIds=[instance_id])
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to allocate VM in region {region}: {str(e)}")
|
||||
# try to clean up any resources that were created
|
||||
try:
|
||||
if instance_id:
|
||||
ec2_client.terminate_instances(InstanceIds=[instance_id])
|
||||
logger.info(f"Terminated instance {instance_id} due to allocation failure.")
|
||||
except Exception as cleanup_error:
|
||||
logger.error(f"May fail to clean up instance {instance_id}: {str(cleanup_error)}")
|
||||
logger.error(f"Failed to allocate VM: {e}", exc_info=True)
|
||||
if instance_id:
|
||||
logger.info(f"Terminating instance {instance_id} due to an error.")
|
||||
ec2_client.terminate_instances(InstanceIds=[instance_id])
|
||||
raise
|
||||
finally:
|
||||
# Restore original signal handlers
|
||||
@@ -153,6 +174,27 @@ def _allocate_vm_with_proxy(region=DEFAULT_REGION, proxy_config_file=None):
|
||||
subnet_id=os.getenv('AWS_SUBNET_ID')
|
||||
)
|
||||
|
||||
try:
|
||||
ec2_client = boto3.client('ec2', region_name=region)
|
||||
instance_details = ec2_client.describe_instances(InstanceIds=[instance_id])
|
||||
instance = instance_details['Reservations'][0]['Instances'][0]
|
||||
public_ip = instance.get('PublicIpAddress', '')
|
||||
if public_ip:
|
||||
vnc_url = f"http://{public_ip}:5910/vnc.html"
|
||||
logger.info("="*80)
|
||||
logger.info(f"🖥️ VNC Web Access URL: {vnc_url}")
|
||||
logger.info(f"📡 Public IP: {public_ip}")
|
||||
logger.info(f"🆔 Instance ID: {instance_id}")
|
||||
if current_proxy:
|
||||
logger.info(f"🌐 Proxy: {current_proxy.host}:{current_proxy.port}")
|
||||
logger.info("="*80)
|
||||
print(f"\n🌐 VNC Web Access URL: {vnc_url}")
|
||||
if current_proxy:
|
||||
print(f"🔄 Current Proxy: {current_proxy.host}:{current_proxy.port}")
|
||||
print(f"📍 Please open the above address in the browser for remote desktop access\n")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get VNC address for proxy instance {instance_id}: {e}")
|
||||
|
||||
return instance_id
|
||||
|
||||
|
||||
@@ -213,4 +255,4 @@ class AWSVMManager(VMManager):
|
||||
else:
|
||||
logger.info("Allocating a new VM in region: {}".format(region))
|
||||
new_vm_path = _allocate_vm(region)
|
||||
return new_vm_path
|
||||
return new_vm_path
|
||||
Reference in New Issue
Block a user