Add TTL configuration for AWS instance management

- Introduced a new config module to manage TTL settings for EC2 instances, allowing for auto-termination based on environment variables.
- Updated the AWSProvider and manager to utilize the new TTL settings, including scheduling instance termination via EventBridge Scheduler.
- Added utility functions for resolving the scheduler role ARN and creating termination schedules, ensuring robust error handling and logging.
- Maintained existing code logic while integrating new features for improved instance lifecycle management.
This commit is contained in:
Timothyxxx
2025-08-18 17:30:49 +00:00
parent 75f00fea62
commit 3a96fd5046
4 changed files with 197 additions and 7 deletions

View File

@@ -2,11 +2,14 @@ import boto3
from botocore.exceptions import ClientError
import logging
from desktop_env.providers.base import Provider
from datetime import datetime
import os
import time
from datetime import datetime, timedelta, timezone
from desktop_env.providers.base import Provider
# TTL configuration
from desktop_env.providers.aws.config import ENABLE_TTL, DEFAULT_TTL_MINUTES, AWS_SCHEDULER_ROLE_ARN
from desktop_env.providers.aws.scheduler_utils import schedule_instance_termination
logger = logging.getLogger("desktopenv.providers.aws.AWSProvider")
logger.setLevel(logging.INFO)
@@ -116,12 +119,18 @@ class AWSProvider(Provider):
# Step 3: Launch a new instance from the snapshot(AMI) with performance optimization
logger.info(f"Launching a new instance from AMI {snapshot_name}...")
# TTL configuration follows the same env flags as allocation (centralized)
enable_ttl = ENABLE_TTL
default_ttl_minutes = DEFAULT_TTL_MINUTES
ttl_seconds = max(0, default_ttl_minutes * 60)
run_instances_params = {
"MaxCount": 1,
"MinCount": 1,
"ImageId": snapshot_name,
"InstanceType": instance_type,
"EbsOptimized": True,
"InstanceInitiatedShutdownBehavior": "terminate",
"NetworkInterfaces": [
{
"SubnetId": subnet_id,
@@ -151,7 +160,40 @@ class AWSProvider(Provider):
ec2_client.get_waiter('instance_running').wait(InstanceIds=[new_instance_id])
logger.info(f"Instance {new_instance_id} is ready.")
# Schedule cloud-side termination via EventBridge Scheduler (auto-resolve role ARN)
try:
if enable_ttl:
schedule_instance_termination(self.region, new_instance_id, ttl_seconds, AWS_SCHEDULER_ROLE_ARN, logger)
except Exception as e:
logger.warning(f"Failed to create EventBridge Scheduler for {new_instance_id}: {e}")
# Schedule cloud-side termination via EventBridge Scheduler (same as allocation path)
try:
if enable_ttl and os.getenv('AWS_SCHEDULER_ROLE_ARN'):
scheduler_client = boto3.client('scheduler', region_name=self.region)
schedule_name = f"osworld-ttl-{new_instance_id}-{int(time.time())}"
eta_scheduler = datetime.now(timezone.utc) + timedelta(seconds=ttl_seconds)
schedule_expression = f"at({eta_scheduler.strftime('%Y-%m-%dT%H:%M:%S')})"
target_arn = "arn:aws:scheduler:::aws-sdk:ec2:terminateInstances"
input_payload = '{"InstanceIds":["' + new_instance_id + '"]}'
scheduler_client.create_schedule(
Name=schedule_name,
ScheduleExpression=schedule_expression,
FlexibleTimeWindow={"Mode": "OFF"},
Target={
"Arn": target_arn,
"RoleArn": os.getenv('AWS_SCHEDULER_ROLE_ARN'),
"Input": input_payload
},
State='ENABLED',
Description=f"OSWorld TTL terminate for {new_instance_id}"
)
logger.info(f"Scheduled EC2 termination via EventBridge Scheduler for snapshot revert: name={schedule_name}, when={eta_scheduler.isoformat()} (UTC)")
else:
logger.info("TTL enabled but AWS_SCHEDULER_ROLE_ARN not set; skipping scheduler for snapshot revert.")
except Exception as e:
logger.warning(f"Failed to create EventBridge Scheduler for {new_instance_id}: {e}")
try:
instance_details = ec2_client.describe_instances(InstanceIds=[new_instance_id])
instance = instance_details['Reservations'][0]['Instances'][0]