Enhance AWSProvider instance handling with fallback mechanisms for security groups, subnet IDs, and instance types. Implement checks to skip termination of instances already in 'shutting-down' or 'terminated' states, and handle potential termination errors gracefully.
This commit is contained in:
@@ -108,13 +108,52 @@ class AWSProvider(Provider):
|
|||||||
# Step 1: Retrieve the original instance details
|
# Step 1: Retrieve the original instance details
|
||||||
instance_details = ec2_client.describe_instances(InstanceIds=[path_to_vm])
|
instance_details = ec2_client.describe_instances(InstanceIds=[path_to_vm])
|
||||||
instance = instance_details['Reservations'][0]['Instances'][0]
|
instance = instance_details['Reservations'][0]['Instances'][0]
|
||||||
security_groups = [sg['GroupId'] for sg in instance['SecurityGroups']]
|
# Resolve security groups with fallbacks
|
||||||
subnet_id = instance['SubnetId']
|
security_groups = [sg['GroupId'] for sg in instance.get('SecurityGroups', []) if 'GroupId' in sg]
|
||||||
instance_type = instance['InstanceType']
|
if not security_groups:
|
||||||
|
env_sg = os.getenv('AWS_SECURITY_GROUP_ID')
|
||||||
|
if env_sg:
|
||||||
|
security_groups = [env_sg]
|
||||||
|
logger.info("SecurityGroups missing on instance; using AWS_SECURITY_GROUP_ID from env")
|
||||||
|
else:
|
||||||
|
raise ValueError("No security groups found on instance and AWS_SECURITY_GROUP_ID not set")
|
||||||
|
|
||||||
|
# Resolve subnet with fallbacks
|
||||||
|
subnet_id = instance.get('SubnetId')
|
||||||
|
if not subnet_id:
|
||||||
|
nis = instance.get('NetworkInterfaces', []) or []
|
||||||
|
if nis and isinstance(nis, list):
|
||||||
|
for ni in nis:
|
||||||
|
if isinstance(ni, dict) and ni.get('SubnetId'):
|
||||||
|
subnet_id = ni.get('SubnetId')
|
||||||
|
break
|
||||||
|
if not subnet_id:
|
||||||
|
env_subnet = os.getenv('AWS_SUBNET_ID')
|
||||||
|
if env_subnet:
|
||||||
|
subnet_id = env_subnet
|
||||||
|
logger.info("SubnetId missing on instance; using AWS_SUBNET_ID from env")
|
||||||
|
else:
|
||||||
|
raise ValueError("SubnetId not available on instance, NetworkInterfaces, or environment")
|
||||||
|
|
||||||
|
# Resolve instance type with fallbacks
|
||||||
|
instance_type = instance.get('InstanceType') or os.getenv('AWS_INSTANCE_TYPE') or 't3.large'
|
||||||
|
if instance.get('InstanceType') is None:
|
||||||
|
logger.info(f"InstanceType missing on instance; using '{instance_type}' from env/default")
|
||||||
|
|
||||||
# Step 2: Terminate the old instance
|
# Step 2: Terminate the old instance (skip if already terminated/shutting-down)
|
||||||
ec2_client.terminate_instances(InstanceIds=[path_to_vm])
|
state = (instance.get('State') or {}).get('Name')
|
||||||
logger.info(f"Old instance {path_to_vm} has been terminated.")
|
if state in ['shutting-down', 'terminated']:
|
||||||
|
logger.info(f"Old instance {path_to_vm} is already in state '{state}', skipping termination.")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
ec2_client.terminate_instances(InstanceIds=[path_to_vm])
|
||||||
|
logger.info(f"Old instance {path_to_vm} has been terminated.")
|
||||||
|
except ClientError as e:
|
||||||
|
error_code = getattr(getattr(e, 'response', {}), 'get', lambda *_: None)('Error', {}).get('Code') if hasattr(e, 'response') else None
|
||||||
|
if error_code in ['InvalidInstanceID.NotFound', 'IncorrectInstanceState']:
|
||||||
|
logger.info(f"Ignore termination error for {path_to_vm}: {error_code}")
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
# Step 3: Launch a new instance from the snapshot(AMI) with performance optimization
|
# Step 3: Launch a new instance from the snapshot(AMI) with performance optimization
|
||||||
logger.info(f"Launching a new instance from AMI {snapshot_name}...")
|
logger.info(f"Launching a new instance from AMI {snapshot_name}...")
|
||||||
|
|||||||
Reference in New Issue
Block a user