- Introduced a new config module to manage TTL settings for EC2 instances, allowing for auto-termination based on environment variables. - Updated the AWSProvider and manager to utilize the new TTL settings, including scheduling instance termination via EventBridge Scheduler. - Added utility functions for resolving the scheduler role ARN and creating termination schedules, ensuring robust error handling and logging. - Maintained existing code logic while integrating new features for improved instance lifecycle management.
108 lines
4.2 KiB
Python
108 lines
4.2 KiB
Python
import os
|
|
import time
|
|
import json
|
|
from datetime import datetime, timedelta, timezone
|
|
import boto3
|
|
from botocore.exceptions import ClientError
|
|
|
|
|
|
def _resolve_scheduler_role_arn(logger) -> str:
|
|
# 1) Explicit env takes precedence
|
|
role_arn = os.getenv('AWS_SCHEDULER_ROLE_ARN', '').strip()
|
|
if role_arn:
|
|
return role_arn
|
|
|
|
# 2) Derive from role name + account id
|
|
role_name = os.getenv('AWS_SCHEDULER_ROLE_NAME', 'osworld-scheduler-ec2-terminate').strip()
|
|
try:
|
|
sts = boto3.client('sts')
|
|
account_id = sts.get_caller_identity()['Account']
|
|
derived_arn = f"arn:aws:iam::{account_id}:role/{role_name}"
|
|
iam = boto3.client('iam')
|
|
try:
|
|
iam.get_role(RoleName=role_name)
|
|
logger.info(f"Derived AWS_SCHEDULER_ROLE_ARN={derived_arn} from role name '{role_name}'")
|
|
return derived_arn
|
|
except ClientError as e:
|
|
auto_create = os.getenv('AWS_AUTO_CREATE_SCHEDULER_ROLE', 'true').lower() == 'true'
|
|
if not auto_create:
|
|
logger.warning(f"Scheduler role '{role_name}' not found and auto-create disabled: {e}")
|
|
return ''
|
|
# Attempt to create role
|
|
try:
|
|
trust_policy = {
|
|
"Version": "2012-10-17",
|
|
"Statement": [
|
|
{
|
|
"Effect": "Allow",
|
|
"Principal": {"Service": "scheduler.amazonaws.com"},
|
|
"Action": "sts:AssumeRole"
|
|
}
|
|
]
|
|
}
|
|
iam.create_role(
|
|
RoleName=role_name,
|
|
AssumeRolePolicyDocument=json.dumps(trust_policy)
|
|
)
|
|
# Attach minimal inline policy
|
|
inline_policy = {
|
|
"Version": "2012-10-17",
|
|
"Statement": [
|
|
{
|
|
"Effect": "Allow",
|
|
"Action": ["ec2:TerminateInstances", "ec2:DescribeInstances"],
|
|
"Resource": "*"
|
|
}
|
|
]
|
|
}
|
|
iam.put_role_policy(
|
|
RoleName=role_name,
|
|
PolicyName=f"{role_name}-inline",
|
|
PolicyDocument=json.dumps(inline_policy)
|
|
)
|
|
# Small wait for IAM propagation
|
|
time.sleep(3)
|
|
logger.info(f"Auto-created scheduler role '{role_name}'. Using {derived_arn}")
|
|
return derived_arn
|
|
except ClientError as ce:
|
|
logger.warning(f"Failed to auto-create scheduler role '{role_name}': {ce}")
|
|
return ''
|
|
except Exception as e:
|
|
logger.warning(f"Failed to resolve Scheduler Role ARN: {e}")
|
|
return ''
|
|
|
|
|
|
def schedule_instance_termination(region: str, instance_id: str, ttl_seconds: int, role_arn: str, logger) -> None:
|
|
if not role_arn:
|
|
role_arn = _resolve_scheduler_role_arn(logger)
|
|
if not role_arn:
|
|
logger.info("Scheduler role ARN not available; skipping TTL schedule creation.")
|
|
return
|
|
scheduler_client = boto3.client('scheduler', region_name=region)
|
|
schedule_name = f"osworld-ttl-{instance_id}-{int(time.time())}"
|
|
eta_scheduler = datetime.now(timezone.utc) + timedelta(seconds=ttl_seconds)
|
|
# EventBridge Scheduler expects RFC3339 without trailing Z for 'at()' when region-local is fine
|
|
schedule_expression = f"at({eta_scheduler.strftime('%Y-%m-%dT%H:%M:%S')})"
|
|
target_arn = "arn:aws:scheduler:::aws-sdk:ec2:terminateInstances"
|
|
input_payload = '{"InstanceIds":["' + instance_id + '"]}'
|
|
|
|
scheduler_client.create_schedule(
|
|
Name=schedule_name,
|
|
ScheduleExpression=schedule_expression,
|
|
FlexibleTimeWindow={"Mode": "OFF"},
|
|
ActionAfterCompletion='DELETE',
|
|
Target={
|
|
"Arn": target_arn,
|
|
"RoleArn": role_arn,
|
|
"Input": input_payload
|
|
},
|
|
State='ENABLED',
|
|
Description=f"OSWorld TTL terminate for {instance_id}"
|
|
)
|
|
|
|
logger.info(
|
|
f"Scheduled EC2 termination via EventBridge Scheduler: name={schedule_name}, when={eta_scheduler.isoformat()} (UTC)"
|
|
)
|
|
|
|
|