feat&fix: add signal handling for VM allocation and improve cleanup on termination

This commit is contained in:
adlsdztony
2025-06-04 03:15:30 +00:00
parent 8d54d4302f
commit 10153ffff6
2 changed files with 42 additions and 2 deletions

View File

@@ -4,6 +4,7 @@ import boto3
import psutil
import logging
import dotenv
import signal
# Load environment variables from .env file
dotenv.load_dotenv()
@@ -57,22 +58,62 @@ def _allocate_vm(region=DEFAULT_REGION):
}
ec2_client = boto3.client('ec2', region_name=region)
instance_id = None
original_sigint_handler = signal.getsignal(signal.SIGINT)
original_sigterm_handler = signal.getsignal(signal.SIGTERM)
def signal_handler(sig, frame):
if instance_id:
signal_name = "SIGINT" if sig == signal.SIGINT else "SIGTERM"
logger.warning(f"Received {signal_name} signal, terminating instance {instance_id}...")
try:
ec2_client.terminate_instances(InstanceIds=[instance_id])
logger.info(f"Successfully terminated instance {instance_id} after {signal_name}.")
except Exception as cleanup_error:
logger.error(f"Failed to terminate instance {instance_id} after {signal_name}: {str(cleanup_error)}")
# Restore original signal handlers
signal.signal(signal.SIGINT, original_sigint_handler)
signal.signal(signal.SIGTERM, original_sigterm_handler)
# Raise appropriate exception based on signal type
if sig == signal.SIGINT:
raise KeyboardInterrupt
else:
# For SIGTERM, exit gracefully
import sys
sys.exit(0)
try:
# Set up signal handlers for both SIGINT and SIGTERM
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
response = ec2_client.run_instances(**run_instances_params)
instance_id = response['Instances'][0]['InstanceId']
logger.info(f"Waiting for instance {instance_id} to be running...")
ec2_client.get_waiter('instance_running').wait(InstanceIds=[instance_id])
logger.info(f"Instance {instance_id} is ready.")
except KeyboardInterrupt:
logger.warning("VM allocation interrupted by user (SIGINT).")
raise
except SystemExit:
logger.warning("VM allocation terminated by parent process (SIGTERM).")
raise
except Exception as e:
logger.error(f"Failed to allocate VM in region {region}: {str(e)}")
# try to clean up any resources that were created
try:
if 'InstanceId' in response['Instances'][0]:
if instance_id:
ec2_client.terminate_instances(InstanceIds=[instance_id])
logger.info(f"Terminated instance {instance_id} due to allocation failure.")
except Exception as cleanup_error:
logger.error(f"May fail to clean up instance {instance_id}: {str(cleanup_error)}")
raise
finally:
# Restore original signal handlers
signal.signal(signal.SIGINT, original_sigint_handler)
signal.signal(signal.SIGTERM, original_sigterm_handler)
return instance_id

View File

@@ -161,7 +161,6 @@ def process_signal_handler(signum, frame, env_idx):
# Get the active_environments from the caller's frame
local_vars = frame.f_locals
print(f"Local variables in process {env_idx + 1}: {local_vars}")
active_environments = local_vars.get('active_environments', [])
# Close environment in the current process context