Add Aliyun SDK dependencies and implement TTL configuration for ECS instances

- Added new dependencies for Aliyun ECS SDK in requirements.txt and setup.py to support instance management features.
- Introduced a new config module to handle TTL settings for ECS instances, allowing for auto-termination based on environment variables.
- Updated the manager to utilize TTL settings, including scheduling instance termination with proper error handling and logging.
- Maintained existing code logic while enhancing functionality for improved instance lifecycle management.
This commit is contained in:
Timothyxxx
2025-08-22 23:28:58 +08:00
parent b3e1c0344d
commit ebda4d8b3f
5 changed files with 114 additions and 35 deletions

View File

@@ -4,12 +4,14 @@ import dotenv
import time
import signal
import requests
from datetime import datetime, timedelta, timezone
from alibabacloud_ecs20140526.client import Client as ECSClient
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_ecs20140526 import models as ecs_models
from alibabacloud_tea_util.client import Client as UtilClient
from desktop_env.providers.base import VMManager
from desktop_env.providers.aliyun.config import ENABLE_TTL, DEFAULT_TTL_MINUTES
dotenv.load_dotenv()
@@ -101,27 +103,56 @@ def _allocate_vm(screen_size=(1920, 1080)):
f"Creating new ECS instance in region {ALIYUN_REGION} with image {ALIYUN_IMAGE_ID}"
)
# Create instance request
request = ecs_models.RunInstancesRequest(
region_id=ALIYUN_REGION,
image_id=ALIYUN_IMAGE_ID,
instance_type=ALIYUN_INSTANCE_TYPE,
security_group_id=ALIYUN_SECURITY_GROUP_ID,
v_switch_id=ALIYUN_VSWITCH_ID,
instance_name=f"OSWorld-Desktop-{int(time.time())}",
description="OSWorld Desktop Environment Instance",
internet_max_bandwidth_out=10,
internet_charge_type="PayByTraffic",
instance_charge_type="PostPaid",
system_disk=ecs_models.RunInstancesRequestSystemDisk(
size="50",
category="cloud_essd",
),
deletion_protection=False,
# TTL configuration
ttl_enabled = ENABLE_TTL
ttl_minutes = DEFAULT_TTL_MINUTES
ttl_seconds = max(0, int(ttl_minutes) * 60)
# Aliyun constraints: at least 30 minutes in the future, ISO8601 UTC, seconds must be 00
now_utc = datetime.now(timezone.utc)
min_eta = now_utc + timedelta(minutes=30)
raw_eta = now_utc + timedelta(seconds=ttl_seconds)
effective_eta = raw_eta if raw_eta > min_eta else min_eta
# round up to the next full minute, zero seconds
effective_eta = (effective_eta + timedelta(seconds=59)).replace(second=0, microsecond=0)
auto_release_str = effective_eta.strftime('%Y-%m-%dT%H:%M:%SZ')
logger.info(
f"TTL config: enabled={ttl_enabled}, minutes={ttl_minutes}, seconds={ttl_seconds}, ETA(UTC)={auto_release_str}"
)
# Create the instance
response = client.run_instances(request)
# Create instance request (attempt with auto_release_time first when TTL enabled)
def _build_request(with_ttl: bool) -> ecs_models.RunInstancesRequest:
kwargs = dict(
region_id=ALIYUN_REGION,
image_id=ALIYUN_IMAGE_ID,
instance_type=ALIYUN_INSTANCE_TYPE,
security_group_id=ALIYUN_SECURITY_GROUP_ID,
v_switch_id=ALIYUN_VSWITCH_ID,
instance_name=f"OSWorld-Desktop-{int(time.time())}",
description="OSWorld Desktop Environment Instance",
internet_max_bandwidth_out=10,
internet_charge_type="PayByTraffic",
instance_charge_type="PostPaid",
system_disk=ecs_models.RunInstancesRequestSystemDisk(
size="50",
category="cloud_essd",
),
deletion_protection=False,
)
if with_ttl and ttl_enabled and ttl_seconds > 0:
kwargs["auto_release_time"] = auto_release_str
return ecs_models.RunInstancesRequest(**kwargs)
try:
request = _build_request(with_ttl=True)
response = client.run_instances(request)
except Exception as create_err:
# Retry without auto_release_time if creation-time TTL is rejected
logger.warning(
f"RunInstances with auto_release_time failed: {create_err}. Retrying without TTL field..."
)
request = _build_request(with_ttl=False)
response = client.run_instances(request)
instance_ids = response.body.instance_id_sets.instance_id_set
if not instance_ids: