From 4f41b1fc2ec165694791cc118ed1b1863cbf51da Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Thu, 19 Dec 2024 16:43:13 +0900 Subject: [PATCH 01/75] update --- src/ai/backend/manager/models/endpoint.py | 171 +++++++++++++++++- src/ai/backend/manager/models/kernel.py | 24 ++- .../backend/manager/scheduler/dispatcher.py | 113 +++++++++++- 3 files changed, 300 insertions(+), 8 deletions(-) diff --git a/src/ai/backend/manager/models/endpoint.py b/src/ai/backend/manager/models/endpoint.py index ab8f0b26f55..42beb637437 100644 --- a/src/ai/backend/manager/models/endpoint.py +++ b/src/ai/backend/manager/models/endpoint.py @@ -1,7 +1,8 @@ import datetime import logging import uuid -from enum import Enum +from decimal import Decimal +from enum import Enum, StrEnum from pathlib import Path from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, Optional, Sequence, cast @@ -29,6 +30,7 @@ ImageAlias, MountPermission, MountTypes, + RedisConnectionInfo, ResourceSlot, RuntimeVariant, SessionTypes, @@ -84,6 +86,8 @@ from .gql import GraphQueryContext __all__ = ( + "AutoScalingMetricSource", + "AutoScalingMetricComparator", "EndpointRow", "Endpoint", "EndpointLifecycle", @@ -94,6 +98,7 @@ "EndpointTokenRow", "EndpointToken", "EndpointTokenList", + "EndpointAutoScalingRuleRow", ) @@ -106,6 +111,18 @@ class EndpointLifecycle(Enum): DESTROYED = "destroyed" +class AutoScalingMetricSource(StrEnum): + KERNEL = "kernel" + INFERENCE_FRAMEWORK = "inference-framework" + + +class AutoScalingMetricComparator(StrEnum): + LESS_THAN = "lt" + LESS_THAN_OR_EQUAL = "le" + GREATHER_THAN = "gt" + GREATHER_THAN_OR_EQUAL = "ge" + + class EndpointRow(Base): __tablename__ = "endpoints" @@ -211,6 +228,9 @@ class EndpointRow(Base): routings = relationship("RoutingRow", back_populates="endpoint_row") tokens = relationship("EndpointTokenRow", back_populates="endpoint_row") + endpoint_auto_scaling_rules = relationship( + "EndpointAutoScalingRuleRow", back_populates="endpoint_row" + ) image_row = relationship("ImageRow", back_populates="endpoints") model_row = relationship("VFolderRow", back_populates="endpoints") created_user_row = relationship( @@ -355,6 +375,47 @@ async def list( result = await session.execute(query) return result.scalars().all() + @classmethod + async def bulk_load( + cls, + session: AsyncSession, + endpoint_ids: List[uuid.UUID], + domain: Optional[str] = None, + project: Optional[uuid.UUID] = None, + user_uuid: Optional[uuid.UUID] = None, + load_routes=False, + load_image=False, + load_tokens=False, + load_created_user=False, + load_session_owner=False, + status_filter=[EndpointLifecycle.CREATED], + ) -> List["EndpointRow"]: + query = ( + sa.select(EndpointRow) + .order_by(sa.desc(EndpointRow.created_at)) + .filter( + EndpointRow.lifecycle_stage.in_(status_filter) & EndpointRow.id.in_(endpoint_ids) + ) + ) + if load_routes: + query = query.options(selectinload(EndpointRow.routings)) + if load_tokens: + query = query.options(selectinload(EndpointRow.tokens)) + if load_image: + query = query.options(selectinload(EndpointRow.image_row)) + if load_created_user: + query = query.options(selectinload(EndpointRow.created_user_row)) + if load_session_owner: + query = query.options(selectinload(EndpointRow.session_owner_row)) + if project: + query = query.filter(EndpointRow.project == project) + if domain: + query = query.filter(EndpointRow.domain == domain) + if user_uuid: + query = query.filter(EndpointRow.session_owner == user_uuid) + result = await session.execute(query) + return result.scalars().all() + @classmethod async def list_by_model( cls, @@ -396,6 +457,29 @@ async def list_by_model( result = await session.execute(query) return result.scalars().all() + async def create_scaling_rule( + self, + session: AsyncSession, + metric_source: AutoScalingMetricSource, + metric_name: str, + threshold: Decimal, + comparator: AutoScalingMetricComparator, + step_size: int, + cooldown_seconds: int = 300, + ) -> "EndpointAutoScalingRuleRow": + row = EndpointAutoScalingRuleRow( + uuid.uuid4(), + self.id, + metric_source, + metric_name, + threshold, + comparator, + step_size, + cooldown_seconds=cooldown_seconds, + ) + session.add(row) + return row + class EndpointTokenRow(Base): __tablename__ = "endpoint_tokens" @@ -494,6 +578,77 @@ async def get( return row +class EndpointAutoScalingRuleRow(Base): + __tablename__ = "endpoint_auto_scaling_rules" + + id = IDColumn() + metric_source = sa.Column("metric_source", StrEnumType(AutoScalingMetricSource), nullable=False) + metric_name = sa.Column("metric_name", sa.Text(), nullable=False) + threshold = sa.Column( + "threshold", sa.Text(), nullable=False + ) # FIXME: How can I put Decimal here? + comparator = sa.Column("comparator", StrEnumType(AutoScalingMetricComparator), nullable=False) + step_size = sa.Column("step_size", sa.Integer(), nullable=False) + cooldown_seconds = sa.Column("cooldown_seconds", sa.Integer(), nullable=False, default=300) + + created_at = sa.Column( + "created_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=True, + ) + last_triggered_at = sa.Column( + "last_triggered_at", + sa.DateTime(timezone=True), + nullable=True, + ) + + endpoint = sa.Column( + "endpoint", + GUID, + sa.ForeignKey("endpoints.id", ondelete="CASCADE"), + nullable=False, + ) + + endpoint_row = relationship("EndpointRow", back_populates="endpoint_auto_scaling_rules") + + @classmethod + async def list( + cls, session: AsyncSession, load_endpoint=False + ) -> list["EndpointAutoScalingRuleRow"]: + query = sa.select(EndpointAutoScalingRuleRow) + if load_endpoint: + query = query.options(selectinload(EndpointAutoScalingRuleRow.tokens)) + result = await session.execute(query) + return result.scalars().all() + + def __init__( + self, + id: uuid.UUID, + endpoint: uuid.UUID, + metric_source: AutoScalingMetricSource, + metric_name: str, + threshold: Decimal, + comparator: AutoScalingMetricComparator, + step_size: int, + cooldown_seconds: int = 300, + ) -> None: + self.id = id + self.endpoint = endpoint + self.metric_source = metric_source + self.metric_name = metric_name + self.threshold = threshold + self.comparator = comparator + self.step_size = step_size + self.cooldown_seconds = cooldown_seconds + + async def remove_rule( + self, + session: AsyncSession, + ) -> None: + session.delete(self) + + class ModelServicePredicateChecker: @staticmethod async def check_scaling_group( @@ -715,9 +870,9 @@ def from_enum(cls, enum: RuntimeVariant) -> "RuntimeVariantInfo": class EndpointStatistics: @classmethod - async def batch_load_by_endpoint( + async def bulk_load_endpoint_metrics( cls, - ctx: "GraphQueryContext", + redis_stat: RedisConnectionInfo, endpoint_ids: Sequence[uuid.UUID], ) -> Sequence[Optional[Mapping[str, Any]]]: async def _build_pipeline(redis: Redis) -> Pipeline: @@ -727,7 +882,7 @@ async def _build_pipeline(redis: Redis) -> Pipeline: return pipe stats = [] - results = await redis_helper.execute(ctx.redis_stat, _build_pipeline) + results = await redis_helper.execute(redis_stat, _build_pipeline) for result in results: if result is not None: stats.append(msgpack.unpackb(result)) @@ -735,6 +890,14 @@ async def _build_pipeline(redis: Redis) -> Pipeline: stats.append(None) return stats + @classmethod + async def batch_load_by_endpoint( + cls, + ctx: "GraphQueryContext", + endpoint_ids: Sequence[uuid.UUID], + ) -> Sequence[Optional[Mapping[str, Any]]]: + return await cls.bulk_load_endpoint_metrics(ctx.redis_stat, endpoint_ids) + @classmethod async def batch_load_by_replica( cls, diff --git a/src/ai/backend/manager/models/kernel.py b/src/ai/backend/manager/models/kernel.py index e54f4827fd2..1565a4f9e67 100644 --- a/src/ai/backend/manager/models/kernel.py +++ b/src/ai/backend/manager/models/kernel.py @@ -587,6 +587,13 @@ def get_used_days(self, local_tz: tzfile) -> Optional[int]: ) return None + @staticmethod + async def bulk_load_by_session_id( + session: SASession, session_ids: list[uuid.UUID] + ) -> list["KernelRow"]: + query = sa.select(KernelRow).where(KernelRow.session.in_(session_ids)) + return (await session.execute(query)).scalars().all() + @staticmethod async def get_kernel( db: ExtendedAsyncSAEngine, kern_id: uuid.UUID, allow_stale: bool = False @@ -803,11 +810,13 @@ class SessionInfo(TypedDict): class KernelStatistics: @classmethod - async def batch_load_by_kernel( + async def bulk_load_kernel_metrics( cls, - ctx: GraphQueryContext, + redis_stat: RedisConnectionInfo, session_ids: Sequence[SessionId], ) -> Sequence[Optional[Mapping[str, Any]]]: + """For cases where required to collect kernel metrics in bulk internally""" + async def _build_pipeline(redis: Redis) -> Pipeline: pipe = redis.pipeline() for sess_id in session_ids: @@ -815,7 +824,7 @@ async def _build_pipeline(redis: Redis) -> Pipeline: return pipe stats = [] - results = await redis_helper.execute(ctx.redis_stat, _build_pipeline) + results = await redis_helper.execute(redis_stat, _build_pipeline) for result in results: if result is not None: stats.append(msgpack.unpackb(result)) @@ -823,6 +832,15 @@ async def _build_pipeline(redis: Redis) -> Pipeline: stats.append(None) return stats + @classmethod + async def batch_load_by_kernel( + cls, + ctx: GraphQueryContext, + session_ids: Sequence[SessionId], + ) -> Sequence[Optional[Mapping[str, Any]]]: + """wrapper of `KernelStatistics.bulk_load_kernel_metrics()` for aiodataloader""" + return await cls.bulk_load_kernel_metrics(ctx.redis_stat, session_ids) + @classmethod async def batch_load_inference_metrics_by_kernel( cls, diff --git a/src/ai/backend/manager/scheduler/dispatcher.py b/src/ai/backend/manager/scheduler/dispatcher.py index 833c6176704..4eb4509a0f1 100644 --- a/src/ai/backend/manager/scheduler/dispatcher.py +++ b/src/ai/backend/manager/scheduler/dispatcher.py @@ -5,6 +5,7 @@ import json import logging import uuid +from collections import defaultdict from collections.abc import ( Awaitable, Callable, @@ -34,7 +35,7 @@ from sqlalchemy.orm import noload, selectinload from ai.backend.common import redis_helper -from ai.backend.common.defs import REDIS_LIVE_DB +from ai.backend.common.defs import REDIS_LIVE_DB, REDIS_STAT_DB from ai.backend.common.distributed import GlobalTimer from ai.backend.common.events import ( AgentStartedEvent, @@ -81,9 +82,14 @@ from ..models import ( AgentRow, AgentStatus, + AutoScalingMetricComparator, + AutoScalingMetricSource, + EndpointAutoScalingRuleRow, EndpointLifecycle, EndpointRow, + EndpointStatistics, KernelRow, + KernelStatistics, KernelStatus, RouteStatus, RoutingRow, @@ -200,6 +206,7 @@ class SchedulerDispatcher(aobject): update_session_status_timer: GlobalTimer redis_live: RedisConnectionInfo + redis_stat: RedisConnectionInfo def __init__( self, @@ -222,6 +229,11 @@ def __init__( name="scheduler.live", db=REDIS_LIVE_DB, ) + self.redis_stat = redis_helper.get_redis_object( + self.shared_config.data["redis"], + name="stat", + db=REDIS_STAT_DB, + ) async def __ainit__(self) -> None: coalescing_opts: CoalescingOptions = { @@ -1378,6 +1390,105 @@ async def scale_services( manager_id = self.local_config["manager"]["id"] redis_key = f"manager.{manager_id}.scale_services" + async with self.db.begin_sssion() as session: + current_datetime = datetime.now() + rules = await EndpointAutoScalingRuleRow.list(session, load_endpoint=True) + endpoints = await EndpointRow.bulk_load( + session, [rule.endpoint for rule in rules], load_routes=True + ) + endpoint_by_id: dict[uuid.UUID, EndpointRow] = { + endpoint.id: endpoint for endpoint in endpoints + } + metric_requested_sessions: list[uuid.UUID] = list() + metric_requested_kernels: list[uuid.UUID] = list() + metric_requested_endpoints: list[uuid.UUID] = list() + + kernel_statistics_by_id: dict[uuid.UUID, Any] = {} + endpoint_statistics_by_id: dict[uuid.UUID, Any] = {} + kernels_by_session_id: dict[uuid.UUID, list[KernelRow]] = defaultdict(lambda: []) + + for rule in rules: + match rule.metric_source: + case AutoScalingMetricSource.KERNEL: + metric_requested_sessions += [ + route.session for route in endpoint_by_id[rule.endpoint].routings + ] + case AutoScalingMetricSource.INFERENCE_FRAMEWORK: + metric_requested_endpoints.append(rule.endpoint) + + kernel_rows = await KernelRow.bulk_load_by_session_id( + session, list(metric_requested_sessions) + ) + for kernel in kernel_rows: + kernels_by_session_id[kernel.session].append(kernel) + metric_requested_kernels.append(kernel) + + kernel_live_stats = await KernelStatistics.bulk_load_kernel_metrics( + self.redis_stat, + cast(list[SessionId], list(metric_requested_kernels)), + ) + endpoint_live_stats = await EndpointStatistics.bulk_load_endpoint_metrics( + self.redis_stat, + cast(list[SessionId], list(metric_requested_endpoints)), + ) + + kernel_statistics_by_id = { + kernel_id: metric + for kernel_id, metric in zip(metric_requested_kernels, kernel_live_stats) + } + endpoint_statistics_by_id = { + endpoint_id: metric + for endpoint_id, metric in zip(metric_requested_endpoints, endpoint_live_stats) + } + + for rule in rules: + should_trigger = False + + match rule.metric_source: + case AutoScalingMetricSource.KERNEL: + metric_aggregated_value = Decimal("0") + metric_found_kernel_count = 0 + for route in endpoint_by_id[rule.endpoint].routings: + for kernel in kernels_by_session_id[route.session]: + if not kernel_statistics_by_id[kernel.id]: + continue + live_stat = json.loads(kernel_statistics_by_id[kernel.id]) + if rule.metric_name not in live_stat: + continue + metric_found_kernel_count += 1 + metric_aggregated_value += Decimal( + live_stat[rule.metric_name]["current"] + ) + if metric_found_kernel_count == 0: + continue + current_value = metric_aggregated_value / Decimal(metric_found_kernel_count) + case AutoScalingMetricSource.INFERENCE_FRAMEWORK: + if not endpoint_statistics_by_id[rule.endpoint]: + continue + live_stat = json.loads(endpoint_statistics_by_id[rule.endpoint]) + if rule.metric_name not in live_stat: + continue + current_value = Decimal(live_stat[rule.metric_name]["current"]) + case _: + raise AssertionError( + "Should not reach here" + ) # FIXME: Replace with named error + + match rule.comparator: + case AutoScalingMetricComparator.LESS_THAN: + should_trigger = current_value < Decimal(rule.threshold) + case AutoScalingMetricComparator.LESS_THAN_OR_EQUAL: + should_trigger = current_value <= Decimal(rule.threshold) + case AutoScalingMetricComparator.GREATHER_THAN: + should_trigger = current_value > Decimal(rule.threshold) + case AutoScalingMetricComparator.GREATHER_THAN_OR_EQUAL: + should_trigger = current_value >= Decimal(rule.threshold) + + if should_trigger and rule.last_triggered_at < ( + current_datetime - timedelta(seconds=rule.cooldown_seconds) + ): + rule.endpoint += rule.step + def _pipeline(r: Redis) -> RedisPipeline: pipe = r.pipeline() pipe.delete(redis_key) From 22cbde899c22089fef965ce2451e20e92043ead0 Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Fri, 20 Dec 2024 01:37:34 +0900 Subject: [PATCH 02/75] add gql query & mutations --- src/ai/backend/manager/models/endpoint.py | 2 +- src/ai/backend/manager/models/gql.py | 59 +++++ .../backend/manager/scheduler/dispatcher.py | 216 ++++++++++-------- 3 files changed, 177 insertions(+), 100 deletions(-) diff --git a/src/ai/backend/manager/models/endpoint.py b/src/ai/backend/manager/models/endpoint.py index 42beb637437..89075ab17a9 100644 --- a/src/ai/backend/manager/models/endpoint.py +++ b/src/ai/backend/manager/models/endpoint.py @@ -457,7 +457,7 @@ async def list_by_model( result = await session.execute(query) return result.scalars().all() - async def create_scaling_rule( + async def create_auto_scaling_rule( self, session: AsyncSession, metric_source: AutoScalingMetricSource, diff --git a/src/ai/backend/manager/models/gql.py b/src/ai/backend/manager/models/gql.py index 81669804f16..7027e745309 100644 --- a/src/ai/backend/manager/models/gql.py +++ b/src/ai/backend/manager/models/gql.py @@ -81,6 +81,13 @@ DomainPermissionValueField, ModifyDomainNode, ) +from .gql_models.endpoint import ( + CreateEndpointAutoScalingRuleNode, + DeleteEndpointAutoScalingRuleNode, + EndpointAutoScalingRuleConnection, + EndpointAutoScalingRuleNode, + ModifyEndpointAutoScalingRuleNode, +) from .gql_models.fields import AgentPermissionField, ScopeField from .gql_models.group import GroupConnection, GroupNode from .gql_models.image import ( @@ -335,6 +342,16 @@ class Mutations(graphene.ObjectType): description="Added in 24.09.0." ) + create_endpoint_auto_scaling_rule_node = CreateEndpointAutoScalingRuleNode.Field( + description="Added in 24.12.0." + ) + modify_endpoint_auto_scaling_rule_node = ModifyEndpointAutoScalingRuleNode.Field( + description="Added in 24.12.0." + ) + delete_endpoint_auto_scaling_rule_node = DeleteEndpointAutoScalingRuleNode.Field( + description="Added in 24.12.0." + ) + # Legacy mutations create_container_registry = CreateContainerRegistry.Field() modify_container_registry = ModifyContainerRegistry.Field() @@ -899,6 +916,16 @@ class Queries(graphene.ObjectType): ) networks = PaginatedConnectionField(NetworkConnection, description="Added in 24.12.0.") + endpoint_auto_scaling_rule_node = graphene.Field( + EndpointAutoScalingRuleNode, + id=graphene.String(required=True), + description="Added in 24.12.0.", + ) + + endpoint_auto_scaling_rule_nodes = PaginatedConnectionField( + EndpointAutoScalingRuleConnection, description="Added in 24.12.0." + ) + @staticmethod @privileged_query(UserRole.SUPERADMIN) async def resolve_agent( @@ -2618,6 +2645,38 @@ async def resolve_networks( last, ) + @staticmethod + async def resolve_endpoint_auto_scaling_rule_node( + root: Any, + info: graphene.ResolveInfo, + id: str, + ) -> EndpointAutoScalingRuleNode: + return await EndpointAutoScalingRuleNode.get_node(info, id) + + @staticmethod + async def resolve_endpoint_auto_scaling_rule_nodes( + root: Any, + info: graphene.ResolveInfo, + *, + filter: str | None = None, + order: str | None = None, + offset: int | None = None, + after: str | None = None, + first: int | None = None, + before: str | None = None, + last: int | None = None, + ) -> ConnectionResolverResult: + return await EndpointAutoScalingRuleNode.get_connection( + info, + filter_expr=filter, + order_expr=order, + offset=offset, + after=after, + first=first, + before=before, + last=last, + ) + class GQLMutationPrivilegeCheckMiddleware: def resolve(self, next, root, info: graphene.ResolveInfo, **args) -> Any: diff --git a/src/ai/backend/manager/scheduler/dispatcher.py b/src/ai/backend/manager/scheduler/dispatcher.py index 4eb4509a0f1..61891bea3f4 100644 --- a/src/ai/backend/manager/scheduler/dispatcher.py +++ b/src/ai/backend/manager/scheduler/dispatcher.py @@ -1379,6 +1379,117 @@ async def _mark_session_and_kernel_creating( except asyncio.TimeoutError: log.warning("start(): timeout while executing start_session()") + async def _autoscale_endpoints( + self, + session: SASession, + ) -> None: + current_datetime = datetime.now() + rules = await EndpointAutoScalingRuleRow.list(session, load_endpoint=True) + + # currently auto scaling supports two types of stat as source: kernel and endpoint + # to fetch aggregated kernel metrics among every kernels managed by a single endpoint + # we first need to collect every routings, and then the sessions tied to each routing, + # and finally the child kernels of each session + endpoints = await EndpointRow.bulk_load( + session, [rule.endpoint for rule in rules], load_routes=True + ) + endpoint_by_id: dict[uuid.UUID, EndpointRow] = { + endpoint.id: endpoint for endpoint in endpoints + } + metric_requested_sessions: list[uuid.UUID] = list() + metric_requested_kernels: list[uuid.UUID] = list() + metric_requested_endpoints: list[uuid.UUID] = list() + + kernel_statistics_by_id: dict[uuid.UUID, Any] = {} + endpoint_statistics_by_id: dict[uuid.UUID, Any] = {} + kernels_by_session_id: dict[uuid.UUID, list[KernelRow]] = defaultdict(lambda: []) + + for rule in rules: + match rule.metric_source: + case AutoScalingMetricSource.KERNEL: + metric_requested_sessions += [ + route.session for route in endpoint_by_id[rule.endpoint].routings + ] + case AutoScalingMetricSource.INFERENCE_FRAMEWORK: + metric_requested_endpoints.append(rule.endpoint) + + kernel_rows = await KernelRow.bulk_load_by_session_id( + session, list(metric_requested_sessions) + ) + for kernel in kernel_rows: + kernels_by_session_id[kernel.session].append(kernel) + metric_requested_kernels.append(kernel) + + # to speed up and lower the pressure to the redis we must load every metrics + # in bulk, not querying each key at once + kernel_live_stats = await KernelStatistics.bulk_load_kernel_metrics( + self.redis_stat, + cast(list[SessionId], list(metric_requested_kernels)), + ) + endpoint_live_stats = await EndpointStatistics.bulk_load_endpoint_metrics( + self.redis_stat, + cast(list[SessionId], list(metric_requested_endpoints)), + ) + + kernel_statistics_by_id = { + kernel_id: metric + for kernel_id, metric in zip(metric_requested_kernels, kernel_live_stats) + } + endpoint_statistics_by_id = { + endpoint_id: metric + for endpoint_id, metric in zip(metric_requested_endpoints, endpoint_live_stats) + } + + for rule in rules: + should_trigger = False + + match rule.metric_source: + # kernel metrics should be evaluated by the average of the metric across every kernels + case AutoScalingMetricSource.KERNEL: + metric_aggregated_value = Decimal("0") + metric_found_kernel_count = 0 + for route in endpoint_by_id[rule.endpoint].routings: + for kernel in kernels_by_session_id[route.session]: + if not kernel_statistics_by_id[kernel.id]: + continue + live_stat = json.loads(kernel_statistics_by_id[kernel.id]) + if rule.metric_name not in live_stat: + continue + metric_found_kernel_count += 1 + metric_aggregated_value += Decimal( + live_stat[rule.metric_name]["current"] + ) + if metric_found_kernel_count == 0: + continue + current_value = metric_aggregated_value / Decimal(metric_found_kernel_count) + case AutoScalingMetricSource.INFERENCE_FRAMEWORK: + if not endpoint_statistics_by_id[rule.endpoint]: + continue + live_stat = json.loads(endpoint_statistics_by_id[rule.endpoint]) + if rule.metric_name not in live_stat: + continue + current_value = Decimal(live_stat[rule.metric_name]["current"]) + case _: + raise AssertionError("Should not reach here") # FIXME: Replace with named error + + match rule.comparator: + case AutoScalingMetricComparator.LESS_THAN: + should_trigger = current_value < Decimal(rule.threshold) + case AutoScalingMetricComparator.LESS_THAN_OR_EQUAL: + should_trigger = current_value <= Decimal(rule.threshold) + case AutoScalingMetricComparator.GREATHER_THAN: + should_trigger = current_value > Decimal(rule.threshold) + case AutoScalingMetricComparator.GREATHER_THAN_OR_EQUAL: + should_trigger = current_value >= Decimal(rule.threshold) + + # changes applied here will be reflected at consequent queries (at `scale_services()`) + # so we do not have to propagate the changes on the function level + if should_trigger and rule.last_triggered_at < ( + current_datetime - timedelta(seconds=rule.cooldown_seconds) + ): + rule.endpoint_row.replicas += rule.step + rule.last_triggered_at = current_datetime + async def scale_services( self, context: None, @@ -1390,105 +1501,6 @@ async def scale_services( manager_id = self.local_config["manager"]["id"] redis_key = f"manager.{manager_id}.scale_services" - async with self.db.begin_sssion() as session: - current_datetime = datetime.now() - rules = await EndpointAutoScalingRuleRow.list(session, load_endpoint=True) - endpoints = await EndpointRow.bulk_load( - session, [rule.endpoint for rule in rules], load_routes=True - ) - endpoint_by_id: dict[uuid.UUID, EndpointRow] = { - endpoint.id: endpoint for endpoint in endpoints - } - metric_requested_sessions: list[uuid.UUID] = list() - metric_requested_kernels: list[uuid.UUID] = list() - metric_requested_endpoints: list[uuid.UUID] = list() - - kernel_statistics_by_id: dict[uuid.UUID, Any] = {} - endpoint_statistics_by_id: dict[uuid.UUID, Any] = {} - kernels_by_session_id: dict[uuid.UUID, list[KernelRow]] = defaultdict(lambda: []) - - for rule in rules: - match rule.metric_source: - case AutoScalingMetricSource.KERNEL: - metric_requested_sessions += [ - route.session for route in endpoint_by_id[rule.endpoint].routings - ] - case AutoScalingMetricSource.INFERENCE_FRAMEWORK: - metric_requested_endpoints.append(rule.endpoint) - - kernel_rows = await KernelRow.bulk_load_by_session_id( - session, list(metric_requested_sessions) - ) - for kernel in kernel_rows: - kernels_by_session_id[kernel.session].append(kernel) - metric_requested_kernels.append(kernel) - - kernel_live_stats = await KernelStatistics.bulk_load_kernel_metrics( - self.redis_stat, - cast(list[SessionId], list(metric_requested_kernels)), - ) - endpoint_live_stats = await EndpointStatistics.bulk_load_endpoint_metrics( - self.redis_stat, - cast(list[SessionId], list(metric_requested_endpoints)), - ) - - kernel_statistics_by_id = { - kernel_id: metric - for kernel_id, metric in zip(metric_requested_kernels, kernel_live_stats) - } - endpoint_statistics_by_id = { - endpoint_id: metric - for endpoint_id, metric in zip(metric_requested_endpoints, endpoint_live_stats) - } - - for rule in rules: - should_trigger = False - - match rule.metric_source: - case AutoScalingMetricSource.KERNEL: - metric_aggregated_value = Decimal("0") - metric_found_kernel_count = 0 - for route in endpoint_by_id[rule.endpoint].routings: - for kernel in kernels_by_session_id[route.session]: - if not kernel_statistics_by_id[kernel.id]: - continue - live_stat = json.loads(kernel_statistics_by_id[kernel.id]) - if rule.metric_name not in live_stat: - continue - metric_found_kernel_count += 1 - metric_aggregated_value += Decimal( - live_stat[rule.metric_name]["current"] - ) - if metric_found_kernel_count == 0: - continue - current_value = metric_aggregated_value / Decimal(metric_found_kernel_count) - case AutoScalingMetricSource.INFERENCE_FRAMEWORK: - if not endpoint_statistics_by_id[rule.endpoint]: - continue - live_stat = json.loads(endpoint_statistics_by_id[rule.endpoint]) - if rule.metric_name not in live_stat: - continue - current_value = Decimal(live_stat[rule.metric_name]["current"]) - case _: - raise AssertionError( - "Should not reach here" - ) # FIXME: Replace with named error - - match rule.comparator: - case AutoScalingMetricComparator.LESS_THAN: - should_trigger = current_value < Decimal(rule.threshold) - case AutoScalingMetricComparator.LESS_THAN_OR_EQUAL: - should_trigger = current_value <= Decimal(rule.threshold) - case AutoScalingMetricComparator.GREATHER_THAN: - should_trigger = current_value > Decimal(rule.threshold) - case AutoScalingMetricComparator.GREATHER_THAN_OR_EQUAL: - should_trigger = current_value >= Decimal(rule.threshold) - - if should_trigger and rule.last_triggered_at < ( - current_datetime - timedelta(seconds=rule.cooldown_seconds) - ): - rule.endpoint += rule.step - def _pipeline(r: Redis) -> RedisPipeline: pipe = r.pipeline() pipe.delete(redis_key) @@ -1501,6 +1513,12 @@ def _pipeline(r: Redis) -> RedisPipeline: ) return pipe + async def _autoscale_txn() -> None: + async with self.db.begin_sssion(commit_on_end=True) as session: + await self._autoscale_endpoints(session) + + await execute_with_retry(_autoscale_txn) + await redis_helper.execute( self.redis_live, _pipeline, From edb16dca4289daf7dd8cbdd9a8eda21901a49a4b Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Fri, 20 Dec 2024 01:51:11 +0900 Subject: [PATCH 03/75] add migration script --- ...reate_endpoint_auto_scaling_rules_table.py | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 src/ai/backend/manager/models/alembic/versions/fb89f5d7817b_create_endpoint_auto_scaling_rules_table.py diff --git a/src/ai/backend/manager/models/alembic/versions/fb89f5d7817b_create_endpoint_auto_scaling_rules_table.py b/src/ai/backend/manager/models/alembic/versions/fb89f5d7817b_create_endpoint_auto_scaling_rules_table.py new file mode 100644 index 00000000000..6b0828ba4a1 --- /dev/null +++ b/src/ai/backend/manager/models/alembic/versions/fb89f5d7817b_create_endpoint_auto_scaling_rules_table.py @@ -0,0 +1,51 @@ +"""create endpoint_auto_scaling_rules table + +Revision ID: fb89f5d7817b +Revises: 0bb88d5a46bf +Create Date: 2024-12-20 01:48:21.009056 + +""" + +import sqlalchemy as sa +from alembic import op + +from ai.backend.manager.models.base import GUID, IDColumn + +# revision identifiers, used by Alembic. +revision = "fb89f5d7817b" +down_revision = "0bb88d5a46bf" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "endpoint_auto_scaling_rules", + IDColumn(), + sa.Column("metric_source", sa.VARCHAR(64), nullable=False), + sa.Column("metric_name", sa.Text(), nullable=False), + sa.Column("threshold", sa.Text(), nullable=False), + sa.Column("comparator", sa.VARCHAR(64), nullable=False), + sa.Column("step_size", sa.Integer(), nullable=False), + sa.Column("cooldown_seconds", sa.Integer(), nullable=False), + sa.Column( + "created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True + ), + sa.Column("last_triggered_at", sa.DateTime(timezone=True), nullable=True), + sa.Column("endpoint", GUID(), nullable=False), + sa.ForeignKeyConstraint( + ["endpoint"], + ["endpoints.id"], + name=op.f("fk_endpoint_auto_scaling_rules_endpoint_endpoints"), + ondelete="CASCADE", + ), + sa.PrimaryKeyConstraint("id", name=op.f("pk_endpoint_auto_scaling_rules")), + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table("endpoint_auto_scaling_rules") + # ### end Alembic commands ### From 79ad37e9d0d54629e9cb5933e559c3e886b113b9 Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Fri, 20 Dec 2024 02:07:07 +0900 Subject: [PATCH 04/75] add missing file --- .../manager/models/gql_models/endpoint.py | 405 ++++++++++++++++++ 1 file changed, 405 insertions(+) create mode 100644 src/ai/backend/manager/models/gql_models/endpoint.py diff --git a/src/ai/backend/manager/models/gql_models/endpoint.py b/src/ai/backend/manager/models/gql_models/endpoint.py new file mode 100644 index 00000000000..51e0eeade6a --- /dev/null +++ b/src/ai/backend/manager/models/gql_models/endpoint.py @@ -0,0 +1,405 @@ +import decimal +import uuid +from typing import TYPE_CHECKING, Mapping, Self + +import graphene +import sqlalchemy as sa +from dateutil.parser import parse as dtparse +from graphene.types.datetime import DateTime as GQLDateTime +from graphql import Undefined +from sqlalchemy.orm.exc import NoResultFound + +from ai.backend.manager.api.exceptions import ( + GenericForbidden, + InvalidAPIParameters, + ObjectNotFound, +) + +from ..base import ( + FilterExprArg, + OrderExprArg, + generate_sql_info_for_gql_connection, + gql_mutation_wrapper, + set_if_set, +) +from ..endpoint import ( + AutoScalingMetricComparator, + AutoScalingMetricSource, + EndpointAutoScalingRuleRow, + EndpointRow, +) +from ..gql_relay import AsyncNode, Connection, ConnectionResolverResult +from ..minilang.ordering import OrderSpecItem, QueryOrderParser +from ..minilang.queryfilter import FieldSpecItem, QueryFilterParser +from ..user import UserRole + +if TYPE_CHECKING: + from ..gql import GraphQueryContext + + +_queryfilter_fieldspec: Mapping[str, FieldSpecItem] = { + "id": ("id", None), + "metric_source": ("metric_source", None), + "metric_name": ("metric_name", None), + "threshold": ("threshold", None), + "comparator": ("comparator", None), + "step_size": ("step_size", None), + "cooldown_seconds": ("cooldown_seconds", None), + "created_at": ("created_at", dtparse), + "last_triggered_at": ("last_triggered_at", dtparse), + "endpoint": ("endpoint", None), +} + +_queryorder_colmap: Mapping[str, OrderSpecItem] = { + "id": ("id", None), + "metric_source": ("metric_source", None), + "metric_name": ("metric_name", None), + "threshold": ("threshold", None), + "comparator": ("comparator", None), + "step_size": ("step_size", None), + "cooldown_seconds": ("cooldown_seconds", None), + "created_at": ("created_at", None), + "last_triggered_at": ("last_triggered_at", None), + "endpoint": ("endpoint", None), +} + + +class EndpointAutoScalingRuleNode(graphene.ObjectType): + class Meta: + interfaces = (AsyncNode,) + description = "Added in 24.12.0." + + row_id = graphene.UUID(required=True, description="Added in 24.12.0.") + + metric_source = graphene.String(required=True, description="Added in 24.12.0.") + metric_name = graphene.String(required=True, description="Added in 24.12.0.") + threshold = graphene.String(required=True, description="Added in 24.12.0.") + comparator = graphene.String(required=True, description="Added in 24.12.0.") + step_size = graphene.Int(required=True, description="Added in 24.12.0.") + cooldown_seconds = graphene.Int(required=True, description="Added in 24.12.0.") + + created_at = GQLDateTime(required=True, description="Added in 24.12.0.") + last_triggered_at = GQLDateTime(description="Added in 24.12.0.") + + endpoint = graphene.UUID(required=True, description="Added in 24.12.0.") + + @classmethod + def from_row( + cls, graph_ctx: GraphQueryContext, row: EndpointAutoScalingRuleRow + ) -> "EndpointAutoScalingRuleNode": + return EndpointAutoScalingRuleNode( + id=row.id, + row_id=row.id, + metric_source=row.metric_source.name, + metric_name=row.metric_name, + threshold=row.threshold, + comparator=row.comparator.name, + step_size=row.step_size, + cooldown_seconds=row.cooldown_seconds, + created_at=row.created_at, + last_triggered_at=row.last_triggered_at, + endpoint=row.endpoint, + ) + + @classmethod + async def get_node(cls, info: graphene.ResolveInfo, id: str) -> "EndpointAutoScalingRuleNode": + graph_ctx: GraphQueryContext = info.context + + _, rule = AsyncNode.resolve_global_id(info, id) + query = sa.select(EndpointAutoScalingRuleRow).where(EndpointAutoScalingRuleRow.id == rule) + async with graph_ctx.db.begin_readonly_session() as db_session: + rule_row = await db_session.scalar(query) + if rule_row is None: + raise ValueError(f"Rule not found (id: {rule})") + return cls.from_row(graph_ctx, rule_row) + + @classmethod + async def get_connection( + cls, + info: graphene.ResolveInfo, + *, + filter_expr: str | None = None, + order_expr: str | None = None, + offset: int | None = None, + after: str | None = None, + first: int | None = None, + before: str | None = None, + last: int | None = None, + ) -> ConnectionResolverResult[Self]: + graph_ctx: GraphQueryContext = info.context + _filter_arg = ( + FilterExprArg(filter_expr, QueryFilterParser(_queryfilter_fieldspec)) + if filter_expr is not None + else None + ) + _order_expr = ( + OrderExprArg(order_expr, QueryOrderParser(_queryorder_colmap)) + if order_expr is not None + else None + ) + ( + query, + cnt_query, + _, + cursor, + pagination_order, + page_size, + ) = generate_sql_info_for_gql_connection( + info, + EndpointAutoScalingRuleRow, + EndpointAutoScalingRuleRow.id, + _filter_arg, + _order_expr, + offset, + after=after, + first=first, + before=before, + last=last, + ) + async with graph_ctx.db.begin_readonly_session() as db_session: + group_rows = (await db_session.scalars(query)).all() + result = [cls.from_row(graph_ctx, row) for row in group_rows] + total_cnt = await db_session.scalar(cnt_query) + return ConnectionResolverResult(result, cursor, pagination_order, page_size, total_cnt) + + +class EndpointAutoScalingRuleConnection(Connection): + class Meta: + node = EndpointAutoScalingRuleNode + description = "Added in 24.12.0." + + +class EndpointAutoScalingRuleInput(graphene.InputObjectType): + metric_source = graphene.String( + required=True, + description=( + f"Added in 24.12.0. Available values: {", ".join([p.name for p in AutoScalingMetricSource])}" + ), + ) + metric_name = graphene.String(required=True, description="Added in 24.12.0.") + threshold = graphene.String(required=True, description="Added in 24.12.0.") + comparator = graphene.String( + required=True, + description=( + f"Added in 24.12.0. Available values: {", ".join([p.name for p in AutoScalingMetricComparator])}" + ), + ) + step_size = graphene.Int(required=True, description="Added in 24.12.0.") + cooldown_seconds = graphene.Int(required=True, description="Added in 24.12.0.") + + +class ModifyEndpointAutoScalingRuleInput(graphene.InputObjectType): + metric_source = graphene.String( + description=( + f"Added in 24.12.0. Available values: {", ".join([p.name for p in AutoScalingMetricSource])}" + ) + ) + metric_name = graphene.String(description="Added in 24.12.0.") + threshold = graphene.String(description="Added in 24.12.0.") + comparator = graphene.String( + description=( + f"Added in 24.12.0. Available values: {", ".join([p.name for p in AutoScalingMetricComparator])}" + ) + ) + step_size = graphene.Int(description="Added in 24.12.0.") + cooldown_seconds = graphene.Int(description="Added in 24.12.0.") + + +class CreateEndpointAutoScalingRuleNode(graphene.Mutation): + allowed_roles = (UserRole.USER, UserRole.ADMIN, UserRole.SUPERADMIN) + + class Arguments: + endpoint_id = graphene.String(required=True) + props = EndpointAutoScalingRuleInput(required=True) + + @classmethod + async def mutate( + cls, + root, + info: graphene.ResolveInfo, + endpoint_id: str, + props: EndpointAutoScalingRuleInput, + ) -> "CreateEndpointAutoScalingRuleNode": + _, raw_endpoint_id = AsyncNode.resolve_global_id(info, endpoint_id) + if not raw_endpoint_id: + raw_endpoint_id = endpoint_id + + try: + _endpoint_id = uuid.UUID(raw_endpoint_id) + except ValueError: + raise ObjectNotFound("endpoint") + + graph_ctx: GraphQueryContext = info.context + async with graph_ctx.db.begin_session(commit_on_end=True) as db_session: + try: + row = await EndpointRow.get(db_session, _endpoint_id) + except NoResultFound: + raise ObjectNotFound(object_name="endpoint") + + match graph_ctx.user["role"]: + case UserRole.SUPERADMIN: + pass + case UserRole.ADMIN: + if row.domain != graph_ctx.user["domain_name"]: + raise GenericForbidden + case UserRole.USER: + if row.created_user != graph_ctx.user["uuid"]: + raise GenericForbidden + + try: + _source = AutoScalingMetricSource[props.metric_source] + except ValueError: + raise InvalidAPIParameters( + f"Unsupported AutoScalingMetricSource {props.metric_source}" + ) + try: + _comparator = AutoScalingMetricComparator[props.comparator] + except ValueError: + raise InvalidAPIParameters( + f"Unsupported AutoScalingMetricComparator {props.comparator}" + ) + try: + _threshold = decimal.Decimal(props.threshold) + except decimal.InvalidOperation: + raise InvalidAPIParameters(f"Cannot convert {props.threshold} to Decimal") + + async def _do_mutate() -> CreateEndpointAutoScalingRuleNode: + created_rule = await row.create_auto_scaling_rule( + db_session, + _source, + props.name, + _threshold, + _comparator, + props.step_size, + props.cooldown_seconds, + ) + return CreateEndpointAutoScalingRuleNode( + ok=True, + msg="Auto scaling rule created", + network=EndpointAutoScalingRuleNode.from_row(info.context, created_rule), + ) + + return await gql_mutation_wrapper(CreateEndpointAutoScalingRuleNode, _do_mutate) + + +class ModifyEndpointAutoScalingRuleNode(graphene.Mutation): + allowed_roles = (UserRole.USER, UserRole.ADMIN, UserRole.SUPERADMIN) + + class Arguments: + id = graphene.String(required=True) + props = ModifyEndpointAutoScalingRuleInput(required=True) + + @classmethod + async def mutate( + cls, + root, + info: graphene.ResolveInfo, + id: str, + props: ModifyEndpointAutoScalingRuleInput, + ) -> "ModifyEndpointAutoScalingRuleNode": + _, rule_id = AsyncNode.resolve_global_id(info, id) + if not rule_id: + rule_id = id + + try: + _rule_id = uuid.UUID(rule_id) + except ValueError: + raise ObjectNotFound("auto_scaling_rule") + + graph_ctx: GraphQueryContext = info.context + async with graph_ctx.db.begin_session(commit_on_end=True) as db_session: + try: + row = await EndpointAutoScalingRuleRow.get(db_session, _rule_id, load_endpoint=True) + except NoResultFound: + raise ObjectNotFound(object_name="auto_scaling_rule") + + match graph_ctx.user["role"]: + case UserRole.SUPERADMIN: + pass + case UserRole.ADMIN: + if row.endpoint_row.domain != graph_ctx.user["domain_name"]: + raise GenericForbidden + case UserRole.USER: + if row.endpoint_row.created_user != graph_ctx.user["uuid"]: + raise GenericForbidden + + async def _do_mutate() -> CreateEndpointAutoScalingRuleNode: + if (_newval := props.metric_source) and _newval is not Undefined: + try: + row.metric_source = AutoScalingMetricSource[_newval] + except ValueError: + raise InvalidAPIParameters(f"Unsupported AutoScalingMetricSource {_newval}") + if (_newval := props.comparator) and _newval is not Undefined: + try: + row.comparator = AutoScalingMetricComparator[_newval] + except ValueError: + raise InvalidAPIParameters( + f"Unsupported AutoScalingMetricComparator {_newval}" + ) + if (_newval := props.threshold) and _newval is not Undefined: + try: + row.threshold = decimal.Decimal(_newval) + except decimal.InvalidOperation: + raise InvalidAPIParameters(f"Cannot convert {_newval} to Decimal") + + set_if_set(props, row, "metric_name") + set_if_set(props, row, "step_size") + set_if_set(props, row, "cooldown_seconds") + + return ModifyEndpointAutoScalingRuleNode( + ok=True, + msg="Auto scaling rule updated", + network=EndpointAutoScalingRuleNode.from_row(info.context, row), + ) + + return await gql_mutation_wrapper(ModifyEndpointAutoScalingRuleNode, _do_mutate) + + +class DeleteEndpointAutoScalingRuleNode(graphene.Mutation): + allowed_roles = (UserRole.USER, UserRole.ADMIN, UserRole.SUPERADMIN) + + class Arguments: + id = graphene.String(required=True) + + @classmethod + async def mutate( + cls, + root, + info: graphene.ResolveInfo, + id: str, + ) -> "DeleteEndpointAutoScalingRuleNode": + _, rule_id = AsyncNode.resolve_global_id(info, id) + if not rule_id: + rule_id = id + + try: + _rule_id = uuid.UUID(rule_id) + except ValueError: + raise ObjectNotFound("auto_scaling_rule") + + graph_ctx: GraphQueryContext = info.context + async with graph_ctx.db.begin_session(commit_on_end=True) as db_session: + try: + row = await EndpointAutoScalingRuleRow.get(db_session, _rule_id, load_endpoint=True) + except NoResultFound: + raise ObjectNotFound(object_name="auto_scaling_rule") + + match graph_ctx.user["role"]: + case UserRole.SUPERADMIN: + pass + case UserRole.ADMIN: + if row.endpoint_row.domain != graph_ctx.user["domain_name"]: + raise GenericForbidden + case UserRole.USER: + if row.endpoint_row.created_user != graph_ctx.user["uuid"]: + raise GenericForbidden + + async def _do_mutate() -> DeleteEndpointAutoScalingRuleNode: + db_session.delete(row) + + return DeleteEndpointAutoScalingRuleNode( + ok=True, + msg="Auto scaling rule removed", + ) + + return await gql_mutation_wrapper(DeleteEndpointAutoScalingRuleNode, _do_mutate) From 9bc0661dd7dcd20265a4837f0d662c5163c6b166 Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Fri, 20 Dec 2024 02:07:56 +0900 Subject: [PATCH 05/75] add news fragment --- changes/3277.feature.md | 1 + src/ai/backend/manager/models/base.py | 28 ++++++ src/ai/backend/manager/models/endpoint.py | 24 +++-- src/ai/backend/manager/models/gql.py | 6 +- .../manager/models/gql_models/endpoint.py | 91 +++++++++++++++---- src/ai/backend/manager/models/kernel.py | 2 +- .../backend/manager/scheduler/dispatcher.py | 75 +++++++++++---- 7 files changed, 182 insertions(+), 45 deletions(-) create mode 100644 changes/3277.feature.md diff --git a/changes/3277.feature.md b/changes/3277.feature.md new file mode 100644 index 00000000000..c3e45b09110 --- /dev/null +++ b/changes/3277.feature.md @@ -0,0 +1 @@ +Support model service auto scaling diff --git a/src/ai/backend/manager/models/base.py b/src/ai/backend/manager/models/base.py index b9e8100887e..36730e00eb8 100644 --- a/src/ai/backend/manager/models/base.py +++ b/src/ai/backend/manager/models/base.py @@ -14,6 +14,7 @@ MutableMapping, Sequence, ) +from decimal import Decimal from typing import ( TYPE_CHECKING, Any, @@ -1656,3 +1657,30 @@ def generate_sql_info_for_gql_connection( "Set 'first' or 'last' to a smaller integer." ) return ret + + +class DecimalType(TypeDecorator, Decimal): + """ + Database type adaptor for Decimal + """ + + impl = sa.VARCHAR + cache_ok = True + + def process_bind_param( + self, + value: Optional[Decimal], + dialect: Dialect, + ) -> Optional[str]: + return str(value) if value else None + + def process_result_value( + self, + value: str, + dialect: Dialect, + ) -> Optional[Decimal]: + return Decimal(value) if value else None + + @property + def python_type(self) -> type[Decimal]: + return Decimal diff --git a/src/ai/backend/manager/models/endpoint.py b/src/ai/backend/manager/models/endpoint.py index 89075ab17a9..c9b4154b576 100644 --- a/src/ai/backend/manager/models/endpoint.py +++ b/src/ai/backend/manager/models/endpoint.py @@ -54,6 +54,7 @@ from .base import ( GUID, Base, + DecimalType, EndpointIDColumn, EnumValueType, ForeignKeyIDColumn, @@ -119,8 +120,8 @@ class AutoScalingMetricSource(StrEnum): class AutoScalingMetricComparator(StrEnum): LESS_THAN = "lt" LESS_THAN_OR_EQUAL = "le" - GREATHER_THAN = "gt" - GREATHER_THAN_OR_EQUAL = "ge" + GREATER_THAN = "gt" + GREATER_THAN_OR_EQUAL = "ge" class EndpointRow(Base): @@ -584,9 +585,7 @@ class EndpointAutoScalingRuleRow(Base): id = IDColumn() metric_source = sa.Column("metric_source", StrEnumType(AutoScalingMetricSource), nullable=False) metric_name = sa.Column("metric_name", sa.Text(), nullable=False) - threshold = sa.Column( - "threshold", sa.Text(), nullable=False - ) # FIXME: How can I put Decimal here? + threshold = sa.Column("threshold", DecimalType(), nullable=False) comparator = sa.Column("comparator", StrEnumType(AutoScalingMetricComparator), nullable=False) step_size = sa.Column("step_size", sa.Integer(), nullable=False) cooldown_seconds = sa.Column("cooldown_seconds", sa.Integer(), nullable=False, default=300) @@ -618,10 +617,23 @@ async def list( ) -> list["EndpointAutoScalingRuleRow"]: query = sa.select(EndpointAutoScalingRuleRow) if load_endpoint: - query = query.options(selectinload(EndpointAutoScalingRuleRow.tokens)) + query = query.options(selectinload(EndpointAutoScalingRuleRow.endpoint_row)) result = await session.execute(query) return result.scalars().all() + @classmethod + async def get( + cls, session: AsyncSession, id: uuid.UUID, load_endpoint=False + ) -> "EndpointAutoScalingRuleRow": + query = sa.select(EndpointAutoScalingRuleRow).filter(EndpointAutoScalingRuleRow.id == id) + if load_endpoint: + query = query.options(selectinload(EndpointAutoScalingRuleRow.endpoint_row)) + result = await session.execute(query) + row = result.scalar() + if not row: + raise ObjectNotFound("endpoint_auto_scaling_rule") + return row + def __init__( self, id: uuid.UUID, diff --git a/src/ai/backend/manager/models/gql.py b/src/ai/backend/manager/models/gql.py index 7027e745309..2e5f5c24476 100644 --- a/src/ai/backend/manager/models/gql.py +++ b/src/ai/backend/manager/models/gql.py @@ -923,7 +923,9 @@ class Queries(graphene.ObjectType): ) endpoint_auto_scaling_rule_nodes = PaginatedConnectionField( - EndpointAutoScalingRuleConnection, description="Added in 24.12.0." + EndpointAutoScalingRuleConnection, + endpoint=graphene.String(required=True), + description="Added in 24.12.0.", ) @staticmethod @@ -2657,6 +2659,7 @@ async def resolve_endpoint_auto_scaling_rule_node( async def resolve_endpoint_auto_scaling_rule_nodes( root: Any, info: graphene.ResolveInfo, + endpoint: str, *, filter: str | None = None, order: str | None = None, @@ -2668,6 +2671,7 @@ async def resolve_endpoint_auto_scaling_rule_nodes( ) -> ConnectionResolverResult: return await EndpointAutoScalingRuleNode.get_connection( info, + endpoint, filter_expr=filter, order_expr=order, offset=offset, diff --git a/src/ai/backend/manager/models/gql_models/endpoint.py b/src/ai/backend/manager/models/gql_models/endpoint.py index 51e0eeade6a..8ccebc2af82 100644 --- a/src/ai/backend/manager/models/gql_models/endpoint.py +++ b/src/ai/backend/manager/models/gql_models/endpoint.py @@ -3,7 +3,6 @@ from typing import TYPE_CHECKING, Mapping, Self import graphene -import sqlalchemy as sa from dateutil.parser import parse as dtparse from graphene.types.datetime import DateTime as GQLDateTime from graphql import Undefined @@ -85,7 +84,7 @@ class Meta: @classmethod def from_row( - cls, graph_ctx: GraphQueryContext, row: EndpointAutoScalingRuleRow + cls, graph_ctx: "GraphQueryContext", row: EndpointAutoScalingRuleRow ) -> "EndpointAutoScalingRuleNode": return EndpointAutoScalingRuleNode( id=row.id, @@ -102,21 +101,40 @@ def from_row( ) @classmethod - async def get_node(cls, info: graphene.ResolveInfo, id: str) -> "EndpointAutoScalingRuleNode": + async def get_node( + cls, info: graphene.ResolveInfo, rule_id: str + ) -> "EndpointAutoScalingRuleNode": graph_ctx: GraphQueryContext = info.context - _, rule = AsyncNode.resolve_global_id(info, id) - query = sa.select(EndpointAutoScalingRuleRow).where(EndpointAutoScalingRuleRow.id == rule) + _, raw_rule_id = AsyncNode.resolve_global_id(info, rule_id) + if not raw_rule_id: + raw_rule_id = rule_id + try: + _rule_id = uuid.UUID(raw_rule_id) + except ValueError: + raise ObjectNotFound("endpoint_auto_scaling_rule") + async with graph_ctx.db.begin_readonly_session() as db_session: - rule_row = await db_session.scalar(query) - if rule_row is None: - raise ValueError(f"Rule not found (id: {rule})") + rule_row = await EndpointAutoScalingRuleRow.get( + db_session, _rule_id, load_endpoint=True + ) + match graph_ctx.user["role"]: + case UserRole.SUPERADMIN: + pass + case UserRole.ADMIN: + if rule_row.endpoint_row.domain != graph_ctx.user["domain_name"]: + raise GenericForbidden + case UserRole.USER: + if rule_row.endpoint_row.created_user != graph_ctx.user["uuid"]: + raise GenericForbidden + return cls.from_row(graph_ctx, rule_row) @classmethod async def get_connection( cls, info: graphene.ResolveInfo, + endpoint: str, *, filter_expr: str | None = None, order_expr: str | None = None, @@ -156,7 +174,31 @@ async def get_connection( before=before, last=last, ) + async with graph_ctx.db.begin_readonly_session() as db_session: + _, raw_endpoint_id = AsyncNode.resolve_global_id(info, endpoint) + if not raw_endpoint_id: + raw_endpoint_id = endpoint + try: + _endpoint_id = uuid.UUID(raw_endpoint_id) + except ValueError: + raise ObjectNotFound("endpoint") + try: + row = await EndpointRow.get(db_session, _endpoint_id) + except NoResultFound: + raise ObjectNotFound(object_name="endpoint") + + match graph_ctx.user["role"]: + case UserRole.SUPERADMIN: + pass + case UserRole.ADMIN: + if row.endpoint_row.domain != graph_ctx.user["domain_name"]: + raise GenericForbidden + case UserRole.USER: + if row.endpoint_row.created_user != graph_ctx.user["uuid"]: + raise GenericForbidden + + query = query.filter(EndpointAutoScalingRuleRow.endpoint == _endpoint_id) group_rows = (await db_session.scalars(query)).all() result = [cls.from_row(graph_ctx, row) for row in group_rows] total_cnt = await db_session.scalar(cnt_query) @@ -209,20 +251,24 @@ class CreateEndpointAutoScalingRuleNode(graphene.Mutation): allowed_roles = (UserRole.USER, UserRole.ADMIN, UserRole.SUPERADMIN) class Arguments: - endpoint_id = graphene.String(required=True) + endpoint = graphene.String(required=True) props = EndpointAutoScalingRuleInput(required=True) + ok = graphene.Boolean() + msg = graphene.String() + rule = graphene.Field(lambda: EndpointAutoScalingRuleNode, required=False) + @classmethod async def mutate( cls, root, info: graphene.ResolveInfo, - endpoint_id: str, + endpoint: str, props: EndpointAutoScalingRuleInput, ) -> "CreateEndpointAutoScalingRuleNode": - _, raw_endpoint_id = AsyncNode.resolve_global_id(info, endpoint_id) + _, raw_endpoint_id = AsyncNode.resolve_global_id(info, endpoint) if not raw_endpoint_id: - raw_endpoint_id = endpoint_id + raw_endpoint_id = endpoint try: _endpoint_id = uuid.UUID(raw_endpoint_id) @@ -248,13 +294,13 @@ async def mutate( try: _source = AutoScalingMetricSource[props.metric_source] - except ValueError: + except (KeyError, ValueError): raise InvalidAPIParameters( f"Unsupported AutoScalingMetricSource {props.metric_source}" ) try: _comparator = AutoScalingMetricComparator[props.comparator] - except ValueError: + except (KeyError, ValueError): raise InvalidAPIParameters( f"Unsupported AutoScalingMetricComparator {props.comparator}" ) @@ -267,7 +313,7 @@ async def _do_mutate() -> CreateEndpointAutoScalingRuleNode: created_rule = await row.create_auto_scaling_rule( db_session, _source, - props.name, + props.metric_name, _threshold, _comparator, props.step_size, @@ -276,7 +322,7 @@ async def _do_mutate() -> CreateEndpointAutoScalingRuleNode: return CreateEndpointAutoScalingRuleNode( ok=True, msg="Auto scaling rule created", - network=EndpointAutoScalingRuleNode.from_row(info.context, created_rule), + rule=EndpointAutoScalingRuleNode.from_row(info.context, created_rule), ) return await gql_mutation_wrapper(CreateEndpointAutoScalingRuleNode, _do_mutate) @@ -289,6 +335,10 @@ class Arguments: id = graphene.String(required=True) props = ModifyEndpointAutoScalingRuleInput(required=True) + ok = graphene.Boolean() + msg = graphene.String() + rule = graphene.Field(lambda: EndpointAutoScalingRuleNode, required=False) + @classmethod async def mutate( cls, @@ -327,12 +377,12 @@ async def _do_mutate() -> CreateEndpointAutoScalingRuleNode: if (_newval := props.metric_source) and _newval is not Undefined: try: row.metric_source = AutoScalingMetricSource[_newval] - except ValueError: + except (KeyError, ValueError): raise InvalidAPIParameters(f"Unsupported AutoScalingMetricSource {_newval}") if (_newval := props.comparator) and _newval is not Undefined: try: row.comparator = AutoScalingMetricComparator[_newval] - except ValueError: + except (KeyError, ValueError): raise InvalidAPIParameters( f"Unsupported AutoScalingMetricComparator {_newval}" ) @@ -349,7 +399,7 @@ async def _do_mutate() -> CreateEndpointAutoScalingRuleNode: return ModifyEndpointAutoScalingRuleNode( ok=True, msg="Auto scaling rule updated", - network=EndpointAutoScalingRuleNode.from_row(info.context, row), + rule=EndpointAutoScalingRuleNode.from_row(info.context, row), ) return await gql_mutation_wrapper(ModifyEndpointAutoScalingRuleNode, _do_mutate) @@ -361,6 +411,9 @@ class DeleteEndpointAutoScalingRuleNode(graphene.Mutation): class Arguments: id = graphene.String(required=True) + ok = graphene.Boolean() + msg = graphene.String() + @classmethod async def mutate( cls, diff --git a/src/ai/backend/manager/models/kernel.py b/src/ai/backend/manager/models/kernel.py index 1565a4f9e67..3e34be10bc2 100644 --- a/src/ai/backend/manager/models/kernel.py +++ b/src/ai/backend/manager/models/kernel.py @@ -591,7 +591,7 @@ def get_used_days(self, local_tz: tzfile) -> Optional[int]: async def bulk_load_by_session_id( session: SASession, session_ids: list[uuid.UUID] ) -> list["KernelRow"]: - query = sa.select(KernelRow).where(KernelRow.session.in_(session_ids)) + query = sa.select(KernelRow).where(KernelRow.session_id.in_(session_ids)) return (await session.execute(query)).scalars().all() @staticmethod diff --git a/src/ai/backend/manager/scheduler/dispatcher.py b/src/ai/backend/manager/scheduler/dispatcher.py index 61891bea3f4..37c9fa296e4 100644 --- a/src/ai/backend/manager/scheduler/dispatcher.py +++ b/src/ai/backend/manager/scheduler/dispatcher.py @@ -1442,6 +1442,13 @@ async def _autoscale_endpoints( for rule in rules: should_trigger = False + if len(endpoint_by_id[rule.endpoint].routings) == 0: + log.debug( + "_autoscale_endpoints(e: {}, r: {}): endpoint does not have any replicas, skipping", + rule.endpoint, + rule.id, + ) + continue match rule.metric_source: # kernel metrics should be evaluated by the average of the metric across every kernels @@ -1452,7 +1459,7 @@ async def _autoscale_endpoints( for kernel in kernels_by_session_id[route.session]: if not kernel_statistics_by_id[kernel.id]: continue - live_stat = json.loads(kernel_statistics_by_id[kernel.id]) + live_stat = kernel_statistics_by_id[kernel.id] if rule.metric_name not in live_stat: continue metric_found_kernel_count += 1 @@ -1465,30 +1472,62 @@ async def _autoscale_endpoints( case AutoScalingMetricSource.INFERENCE_FRAMEWORK: if not endpoint_statistics_by_id[rule.endpoint]: continue - live_stat = json.loads(endpoint_statistics_by_id[rule.endpoint]) + live_stat = endpoint_statistics_by_id[rule.endpoint] if rule.metric_name not in live_stat: + log.debug( + "_autoscale_endpoints(e: {}, r: {}): metric {} does not exist, skipping", + rule.endpoint, + rule.id, + rule.metric_name, + ) continue - current_value = Decimal(live_stat[rule.metric_name]["current"]) + current_value = Decimal(live_stat[rule.metric_name]["current"]) / len( + endpoint_by_id[rule.endpoint].routings + ) case _: raise AssertionError("Should not reach here") # FIXME: Replace with named error match rule.comparator: case AutoScalingMetricComparator.LESS_THAN: - should_trigger = current_value < Decimal(rule.threshold) + should_trigger = current_value < rule.threshold case AutoScalingMetricComparator.LESS_THAN_OR_EQUAL: - should_trigger = current_value <= Decimal(rule.threshold) - case AutoScalingMetricComparator.GREATHER_THAN: - should_trigger = current_value > Decimal(rule.threshold) - case AutoScalingMetricComparator.GREATHER_THAN_OR_EQUAL: - should_trigger = current_value >= Decimal(rule.threshold) - - # changes applied here will be reflected at consequent queries (at `scale_services()`) - # so we do not have to propagate the changes on the function level - if should_trigger and rule.last_triggered_at < ( - current_datetime - timedelta(seconds=rule.cooldown_seconds) - ): - rule.endpoint_row.replicas += rule.step - rule.last_triggered_at = current_datetime + should_trigger = current_value <= rule.threshold + case AutoScalingMetricComparator.GREATER_THAN: + should_trigger = current_value > rule.threshold + case AutoScalingMetricComparator.GREATER_THAN_OR_EQUAL: + should_trigger = current_value >= rule.threshold + + log.debug( + "_autoscale_endpoints(e: {}, r: {}): {} {} {}: {}", + rule.endpoint, + rule.id, + current_value, + rule.comparator.value, + rule.threshold, + should_trigger, + ) + if should_trigger: + if rule.last_triggered_at is None or rule.last_triggered_at.replace(tzinfo=None) < ( + current_datetime - timedelta(seconds=rule.cooldown_seconds) + ): + # changes applied here will be reflected at consequent queries (at `scale_services()`) + # so we do not have to propagate the changes on the function level + rule.endpoint_row.replicas += rule.step_size + if rule.endpoint_row.replicas < 0: + rule.endpoint_row.replicas = 0 + rule.last_triggered_at = current_datetime + log.debug( + "_autoscale_endpoints(e: {}, r: {}): added {} to replica count", + rule.endpoint, + rule.id, + rule.step_size, + ) + else: + log.debug( + "_autoscale_endpoints(e: {}, r: {}): rule on cooldown period; deferring execution", + rule.endpoint, + rule.id, + ) async def scale_services( self, @@ -1514,7 +1553,7 @@ def _pipeline(r: Redis) -> RedisPipeline: return pipe async def _autoscale_txn() -> None: - async with self.db.begin_sssion(commit_on_end=True) as session: + async with self.db.begin_session(commit_on_end=True) as session: await self._autoscale_endpoints(session) await execute_with_retry(_autoscale_txn) From 66f28e7c14e4bf1f0e76e52a18a97debfbdde61d Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Fri, 20 Dec 2024 12:14:30 +0000 Subject: [PATCH 06/75] chore: update GraphQL schema dump Co-authored-by: octodog --- src/ai/backend/manager/api/schema.graphql | 135 ++++++++++++++++++++++ 1 file changed, 135 insertions(+) diff --git a/src/ai/backend/manager/api/schema.graphql b/src/ai/backend/manager/api/schema.graphql index cfdf9a29eae..363daba176a 100644 --- a/src/ai/backend/manager/api/schema.graphql +++ b/src/ai/backend/manager/api/schema.graphql @@ -220,6 +220,12 @@ type Queries { """Added in 24.12.0.""" networks(filter: String, order: String, offset: Int, before: String, after: String, first: Int, last: Int): NetworkConnection + + """Added in 24.12.0.""" + endpoint_auto_scaling_rule_node(id: String!): EndpointAutoScalingRuleNode + + """Added in 24.12.0.""" + endpoint_auto_scaling_rule_nodes(endpoint: String!, filter: String, order: String, offset: Int, before: String, after: String, first: Int, last: Int): EndpointAutoScalingRuleConnection } """ @@ -1651,6 +1657,65 @@ type NetworkEdge { cursor: String! } +"""Added in 24.12.0.""" +type EndpointAutoScalingRuleNode implements Node { + """The ID of the object""" + id: ID! + + """Added in 24.12.0.""" + row_id: UUID! + + """Added in 24.12.0.""" + metric_source: String! + + """Added in 24.12.0.""" + metric_name: String! + + """Added in 24.12.0.""" + threshold: String! + + """Added in 24.12.0.""" + comparator: String! + + """Added in 24.12.0.""" + step_size: Int! + + """Added in 24.12.0.""" + cooldown_seconds: Int! + + """Added in 24.12.0.""" + created_at: DateTime! + + """Added in 24.12.0.""" + last_triggered_at: DateTime + + """Added in 24.12.0.""" + endpoint: UUID! +} + +"""Added in 24.12.0.""" +type EndpointAutoScalingRuleConnection { + """Pagination data for this connection.""" + pageInfo: PageInfo! + + """Contains the nodes in this connection.""" + edges: [EndpointAutoScalingRuleEdge]! + + """Total count of the GQL nodes of the query.""" + count: Int +} + +""" +Added in 24.12.0. A Relay edge containing a `EndpointAutoScalingRule` and its cursor. +""" +type EndpointAutoScalingRuleEdge { + """The item at the end of the edge""" + node: EndpointAutoScalingRuleNode + + """A cursor for use in pagination""" + cursor: String! +} + """All available GraphQL mutations.""" type Mutations { modify_agent(id: String!, props: ModifyAgentInput!): ModifyAgent @@ -1848,6 +1913,15 @@ type Mutations { """Object id. Can be either global id or object id. Added in 24.09.0.""" id: String! ): DeleteContainerRegistryNode + + """Added in 24.12.0.""" + create_endpoint_auto_scaling_rule_node(endpoint: String!, props: EndpointAutoScalingRuleInput!): CreateEndpointAutoScalingRuleNode + + """Added in 24.12.0.""" + modify_endpoint_auto_scaling_rule_node(id: String!, props: ModifyEndpointAutoScalingRuleInput!): ModifyEndpointAutoScalingRuleNode + + """Added in 24.12.0.""" + delete_endpoint_auto_scaling_rule_node(id: String!): DeleteEndpointAutoScalingRuleNode create_container_registry(hostname: String!, props: CreateContainerRegistryInput!): CreateContainerRegistry modify_container_registry(hostname: String!, props: ModifyContainerRegistryInput!): ModifyContainerRegistry delete_container_registry(hostname: String!): DeleteContainerRegistry @@ -2581,6 +2655,67 @@ type DeleteContainerRegistryNode { container_registry: ContainerRegistryNode } +type CreateEndpointAutoScalingRuleNode { + ok: Boolean + msg: String + rule: EndpointAutoScalingRuleNode +} + +input EndpointAutoScalingRuleInput { + """Added in 24.12.0. Available values: KERNEL, INFERENCE_FRAMEWORK""" + metric_source: String! + + """Added in 24.12.0.""" + metric_name: String! + + """Added in 24.12.0.""" + threshold: String! + + """ + Added in 24.12.0. Available values: LESS_THAN, LESS_THAN_OR_EQUAL, GREATER_THAN, GREATER_THAN_OR_EQUAL + """ + comparator: String! + + """Added in 24.12.0.""" + step_size: Int! + + """Added in 24.12.0.""" + cooldown_seconds: Int! +} + +type ModifyEndpointAutoScalingRuleNode { + ok: Boolean + msg: String + rule: EndpointAutoScalingRuleNode +} + +input ModifyEndpointAutoScalingRuleInput { + """Added in 24.12.0. Available values: KERNEL, INFERENCE_FRAMEWORK""" + metric_source: String + + """Added in 24.12.0.""" + metric_name: String + + """Added in 24.12.0.""" + threshold: String + + """ + Added in 24.12.0. Available values: LESS_THAN, LESS_THAN_OR_EQUAL, GREATER_THAN, GREATER_THAN_OR_EQUAL + """ + comparator: String + + """Added in 24.12.0.""" + step_size: Int + + """Added in 24.12.0.""" + cooldown_seconds: Int +} + +type DeleteEndpointAutoScalingRuleNode { + ok: Boolean + msg: String +} + type CreateContainerRegistry { container_registry: ContainerRegistry } From d494709e484bf5ef69adbc21f71f695316357f52 Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Fri, 20 Dec 2024 21:29:57 +0900 Subject: [PATCH 07/75] add min/max replicas --- ...7b_create_endpoint_auto_scaling_rules_table.py | 2 ++ src/ai/backend/manager/models/endpoint.py | 11 +++++++++++ .../backend/manager/models/gql_models/endpoint.py | 15 ++++++++++++++- 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/ai/backend/manager/models/alembic/versions/fb89f5d7817b_create_endpoint_auto_scaling_rules_table.py b/src/ai/backend/manager/models/alembic/versions/fb89f5d7817b_create_endpoint_auto_scaling_rules_table.py index 6b0828ba4a1..245d7dbdb2c 100644 --- a/src/ai/backend/manager/models/alembic/versions/fb89f5d7817b_create_endpoint_auto_scaling_rules_table.py +++ b/src/ai/backend/manager/models/alembic/versions/fb89f5d7817b_create_endpoint_auto_scaling_rules_table.py @@ -29,6 +29,8 @@ def upgrade() -> None: sa.Column("comparator", sa.VARCHAR(64), nullable=False), sa.Column("step_size", sa.Integer(), nullable=False), sa.Column("cooldown_seconds", sa.Integer(), nullable=False), + sa.Column("min_replicas", sa.Integer(), nullable=True), + sa.Column("max_replicas", sa.Integer(), nullable=True), sa.Column( "created_at", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=True ), diff --git a/src/ai/backend/manager/models/endpoint.py b/src/ai/backend/manager/models/endpoint.py index c9b4154b576..015317ca3f7 100644 --- a/src/ai/backend/manager/models/endpoint.py +++ b/src/ai/backend/manager/models/endpoint.py @@ -467,6 +467,8 @@ async def create_auto_scaling_rule( comparator: AutoScalingMetricComparator, step_size: int, cooldown_seconds: int = 300, + min_replicas: int | None = None, + max_replicas: int | None = None, ) -> "EndpointAutoScalingRuleRow": row = EndpointAutoScalingRuleRow( uuid.uuid4(), @@ -477,6 +479,8 @@ async def create_auto_scaling_rule( comparator, step_size, cooldown_seconds=cooldown_seconds, + min_replicas=min_replicas, + max_replicas=max_replicas, ) session.add(row) return row @@ -590,6 +594,9 @@ class EndpointAutoScalingRuleRow(Base): step_size = sa.Column("step_size", sa.Integer(), nullable=False) cooldown_seconds = sa.Column("cooldown_seconds", sa.Integer(), nullable=False, default=300) + min_replicas = sa.Column("min_replicas", sa.Integer(), nullable=True) + max_replicas = sa.Column("min_replicas", sa.Integer(), nullable=True) + created_at = sa.Column( "created_at", sa.DateTime(timezone=True), @@ -644,6 +651,8 @@ def __init__( comparator: AutoScalingMetricComparator, step_size: int, cooldown_seconds: int = 300, + min_replicas: int | None = None, + max_replicas: int | None = None, ) -> None: self.id = id self.endpoint = endpoint @@ -653,6 +662,8 @@ def __init__( self.comparator = comparator self.step_size = step_size self.cooldown_seconds = cooldown_seconds + self.min_replicas = min_replicas + self.max_replicas = max_replicas async def remove_rule( self, diff --git a/src/ai/backend/manager/models/gql_models/endpoint.py b/src/ai/backend/manager/models/gql_models/endpoint.py index 8ccebc2af82..17ea6ec73cd 100644 --- a/src/ai/backend/manager/models/gql_models/endpoint.py +++ b/src/ai/backend/manager/models/gql_models/endpoint.py @@ -77,6 +77,9 @@ class Meta: step_size = graphene.Int(required=True, description="Added in 24.12.0.") cooldown_seconds = graphene.Int(required=True, description="Added in 24.12.0.") + min_replicas = graphene.Int(description="Added in 24.12.0.") + max_replicas = graphene.Int(description="Added in 24.12.0.") + created_at = GQLDateTime(required=True, description="Added in 24.12.0.") last_triggered_at = GQLDateTime(description="Added in 24.12.0.") @@ -95,6 +98,8 @@ def from_row( comparator=row.comparator.name, step_size=row.step_size, cooldown_seconds=row.cooldown_seconds, + min_replicas=row.min_replicas, + max_replicas=row.max_replicas, created_at=row.created_at, last_triggered_at=row.last_triggered_at, endpoint=row.endpoint, @@ -228,6 +233,8 @@ class EndpointAutoScalingRuleInput(graphene.InputObjectType): ) step_size = graphene.Int(required=True, description="Added in 24.12.0.") cooldown_seconds = graphene.Int(required=True, description="Added in 24.12.0.") + min_replicas = graphene.Int(description="Added in 24.12.0.") + max_replicas = graphene.Int(description="Added in 24.12.0.") class ModifyEndpointAutoScalingRuleInput(graphene.InputObjectType): @@ -245,6 +252,8 @@ class ModifyEndpointAutoScalingRuleInput(graphene.InputObjectType): ) step_size = graphene.Int(description="Added in 24.12.0.") cooldown_seconds = graphene.Int(description="Added in 24.12.0.") + min_replicas = graphene.Int(description="Added in 24.12.0.") + max_replicas = graphene.Int(description="Added in 24.12.0.") class CreateEndpointAutoScalingRuleNode(graphene.Mutation): @@ -317,7 +326,9 @@ async def _do_mutate() -> CreateEndpointAutoScalingRuleNode: _threshold, _comparator, props.step_size, - props.cooldown_seconds, + cooldown_seconds=props.cooldown_seconds, + min_replicas=props.min_replicas, + max_replicas=props.max_replicas, ) return CreateEndpointAutoScalingRuleNode( ok=True, @@ -395,6 +406,8 @@ async def _do_mutate() -> CreateEndpointAutoScalingRuleNode: set_if_set(props, row, "metric_name") set_if_set(props, row, "step_size") set_if_set(props, row, "cooldown_seconds") + set_if_set(props, row, "min_replicas") + set_if_set(props, row, "max_replicas") return ModifyEndpointAutoScalingRuleNode( ok=True, From d1b02316f9e5ee6218cf019a17e1bf05d991e9a1 Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Fri, 20 Dec 2024 12:32:13 +0000 Subject: [PATCH 08/75] chore: update GraphQL schema dump Co-authored-by: octodog --- src/ai/backend/manager/api/schema.graphql | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/ai/backend/manager/api/schema.graphql b/src/ai/backend/manager/api/schema.graphql index 363daba176a..0f52747a1c6 100644 --- a/src/ai/backend/manager/api/schema.graphql +++ b/src/ai/backend/manager/api/schema.graphql @@ -1683,6 +1683,12 @@ type EndpointAutoScalingRuleNode implements Node { """Added in 24.12.0.""" cooldown_seconds: Int! + """Added in 24.12.0.""" + min_replicas: Int + + """Added in 24.12.0.""" + max_replicas: Int + """Added in 24.12.0.""" created_at: DateTime! @@ -2681,6 +2687,12 @@ input EndpointAutoScalingRuleInput { """Added in 24.12.0.""" cooldown_seconds: Int! + + """Added in 24.12.0.""" + min_replicas: Int + + """Added in 24.12.0.""" + max_replicas: Int } type ModifyEndpointAutoScalingRuleNode { @@ -2709,6 +2721,12 @@ input ModifyEndpointAutoScalingRuleInput { """Added in 24.12.0.""" cooldown_seconds: Int + + """Added in 24.12.0.""" + min_replicas: Int + + """Added in 24.12.0.""" + max_replicas: Int } type DeleteEndpointAutoScalingRuleNode { From 11f57e08c386d3219bb8cc5bbd77bf08b8cf001a Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Fri, 20 Dec 2024 21:40:37 +0900 Subject: [PATCH 09/75] add min/max replicas --- src/ai/backend/manager/scheduler/dispatcher.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/ai/backend/manager/scheduler/dispatcher.py b/src/ai/backend/manager/scheduler/dispatcher.py index 37c9fa296e4..169bd788672 100644 --- a/src/ai/backend/manager/scheduler/dispatcher.py +++ b/src/ai/backend/manager/scheduler/dispatcher.py @@ -1507,6 +1507,19 @@ async def _autoscale_endpoints( should_trigger, ) if should_trigger: + new_replicas = rule.endpoint_row.replicas + rule.step_size + if (rule.min_replicas is not None and new_replicas < rule.min_replias) or ( + rule.max_replicas is not None and new_replicas > rule.max_replias + ): + log.debug( + "_autoscale_endpoints(e: {}, r: {}): new replica count {} violates min() / max () replica restriction; skipping", + rule.endpoint, + rule.id, + new_replicas, + rule.min_replicas, + rule.max_replicas, + ) + continue if rule.last_triggered_at is None or rule.last_triggered_at.replace(tzinfo=None) < ( current_datetime - timedelta(seconds=rule.cooldown_seconds) ): From d533ba0c2bccdfc18e2bf2976e3eacf8ca032b98 Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Fri, 20 Dec 2024 22:09:40 +0900 Subject: [PATCH 10/75] add missing annotations --- .../backend/manager/models/gql_models/endpoint.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/ai/backend/manager/models/gql_models/endpoint.py b/src/ai/backend/manager/models/gql_models/endpoint.py index 17ea6ec73cd..4cd3038a31f 100644 --- a/src/ai/backend/manager/models/gql_models/endpoint.py +++ b/src/ai/backend/manager/models/gql_models/endpoint.py @@ -217,6 +217,9 @@ class Meta: class EndpointAutoScalingRuleInput(graphene.InputObjectType): + class Meta: + description = "Added in 24.12.0." + metric_source = graphene.String( required=True, description=( @@ -238,6 +241,9 @@ class EndpointAutoScalingRuleInput(graphene.InputObjectType): class ModifyEndpointAutoScalingRuleInput(graphene.InputObjectType): + class Meta: + description = "Added in 24.12.0." + metric_source = graphene.String( description=( f"Added in 24.12.0. Available values: {", ".join([p.name for p in AutoScalingMetricSource])}" @@ -263,6 +269,9 @@ class Arguments: endpoint = graphene.String(required=True) props = EndpointAutoScalingRuleInput(required=True) + class Meta: + description = "Added in 24.12.0." + ok = graphene.Boolean() msg = graphene.String() rule = graphene.Field(lambda: EndpointAutoScalingRuleNode, required=False) @@ -346,6 +355,9 @@ class Arguments: id = graphene.String(required=True) props = ModifyEndpointAutoScalingRuleInput(required=True) + class Meta: + description = "Added in 24.12.0." + ok = graphene.Boolean() msg = graphene.String() rule = graphene.Field(lambda: EndpointAutoScalingRuleNode, required=False) @@ -424,6 +436,9 @@ class DeleteEndpointAutoScalingRuleNode(graphene.Mutation): class Arguments: id = graphene.String(required=True) + class Meta: + description = "Added in 24.12.0." + ok = graphene.Boolean() msg = graphene.String() From 9de5bf6b7dcf5edf1456b4927f4cec016bb8bbbe Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Fri, 20 Dec 2024 13:12:08 +0000 Subject: [PATCH 11/75] chore: update GraphQL schema dump Co-authored-by: octodog --- src/ai/backend/manager/api/schema.graphql | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/ai/backend/manager/api/schema.graphql b/src/ai/backend/manager/api/schema.graphql index 0f52747a1c6..9e4aaa9a969 100644 --- a/src/ai/backend/manager/api/schema.graphql +++ b/src/ai/backend/manager/api/schema.graphql @@ -2661,12 +2661,14 @@ type DeleteContainerRegistryNode { container_registry: ContainerRegistryNode } +"""Added in 24.12.0.""" type CreateEndpointAutoScalingRuleNode { ok: Boolean msg: String rule: EndpointAutoScalingRuleNode } +"""Added in 24.12.0.""" input EndpointAutoScalingRuleInput { """Added in 24.12.0. Available values: KERNEL, INFERENCE_FRAMEWORK""" metric_source: String! @@ -2695,12 +2697,14 @@ input EndpointAutoScalingRuleInput { max_replicas: Int } +"""Added in 24.12.0.""" type ModifyEndpointAutoScalingRuleNode { ok: Boolean msg: String rule: EndpointAutoScalingRuleNode } +"""Added in 24.12.0.""" input ModifyEndpointAutoScalingRuleInput { """Added in 24.12.0. Available values: KERNEL, INFERENCE_FRAMEWORK""" metric_source: String @@ -2729,6 +2733,7 @@ input ModifyEndpointAutoScalingRuleInput { max_replicas: Int } +"""Added in 24.12.0.""" type DeleteEndpointAutoScalingRuleNode { ok: Boolean msg: String From e6209d463066b9074a5e162ac561ce98d52e79ef Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Fri, 27 Dec 2024 00:20:36 +0900 Subject: [PATCH 12/75] replace bulk with batch --- src/ai/backend/manager/models/endpoint.py | 6 +++--- src/ai/backend/manager/models/kernel.py | 8 ++++---- src/ai/backend/manager/scheduler/dispatcher.py | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/ai/backend/manager/models/endpoint.py b/src/ai/backend/manager/models/endpoint.py index 015317ca3f7..7b1e903a3ae 100644 --- a/src/ai/backend/manager/models/endpoint.py +++ b/src/ai/backend/manager/models/endpoint.py @@ -377,7 +377,7 @@ async def list( return result.scalars().all() @classmethod - async def bulk_load( + async def batch_load( cls, session: AsyncSession, endpoint_ids: List[uuid.UUID], @@ -893,7 +893,7 @@ def from_enum(cls, enum: RuntimeVariant) -> "RuntimeVariantInfo": class EndpointStatistics: @classmethod - async def bulk_load_endpoint_metrics( + async def batch_load_by_endpoint_impl( cls, redis_stat: RedisConnectionInfo, endpoint_ids: Sequence[uuid.UUID], @@ -919,7 +919,7 @@ async def batch_load_by_endpoint( ctx: "GraphQueryContext", endpoint_ids: Sequence[uuid.UUID], ) -> Sequence[Optional[Mapping[str, Any]]]: - return await cls.bulk_load_endpoint_metrics(ctx.redis_stat, endpoint_ids) + return await cls.batch_load_by_endpoint_impl(ctx.redis_stat, endpoint_ids) @classmethod async def batch_load_by_replica( diff --git a/src/ai/backend/manager/models/kernel.py b/src/ai/backend/manager/models/kernel.py index 3e34be10bc2..c0dac4aee46 100644 --- a/src/ai/backend/manager/models/kernel.py +++ b/src/ai/backend/manager/models/kernel.py @@ -588,7 +588,7 @@ def get_used_days(self, local_tz: tzfile) -> Optional[int]: return None @staticmethod - async def bulk_load_by_session_id( + async def batch_load_by_session_id( session: SASession, session_ids: list[uuid.UUID] ) -> list["KernelRow"]: query = sa.select(KernelRow).where(KernelRow.session_id.in_(session_ids)) @@ -810,7 +810,7 @@ class SessionInfo(TypedDict): class KernelStatistics: @classmethod - async def bulk_load_kernel_metrics( + async def batch_load_by_kernel_impl( cls, redis_stat: RedisConnectionInfo, session_ids: Sequence[SessionId], @@ -838,8 +838,8 @@ async def batch_load_by_kernel( ctx: GraphQueryContext, session_ids: Sequence[SessionId], ) -> Sequence[Optional[Mapping[str, Any]]]: - """wrapper of `KernelStatistics.bulk_load_kernel_metrics()` for aiodataloader""" - return await cls.bulk_load_kernel_metrics(ctx.redis_stat, session_ids) + """wrapper of `KernelStatistics.batch_load_by_kernel_impl()` for aiodataloader""" + return await cls.batch_load_by_kernel_impl(ctx.redis_stat, session_ids) @classmethod async def batch_load_inference_metrics_by_kernel( diff --git a/src/ai/backend/manager/scheduler/dispatcher.py b/src/ai/backend/manager/scheduler/dispatcher.py index 169bd788672..19c359b5c0a 100644 --- a/src/ai/backend/manager/scheduler/dispatcher.py +++ b/src/ai/backend/manager/scheduler/dispatcher.py @@ -1390,7 +1390,7 @@ async def _autoscale_endpoints( # to fetch aggregated kernel metrics among every kernels managed by a single endpoint # we first need to collect every routings, and then the sessions tied to each routing, # and finally the child kernels of each session - endpoints = await EndpointRow.bulk_load( + endpoints = await EndpointRow.batch_load( session, [rule.endpoint for rule in rules], load_routes=True ) endpoint_by_id: dict[uuid.UUID, EndpointRow] = { @@ -1413,7 +1413,7 @@ async def _autoscale_endpoints( case AutoScalingMetricSource.INFERENCE_FRAMEWORK: metric_requested_endpoints.append(rule.endpoint) - kernel_rows = await KernelRow.bulk_load_by_session_id( + kernel_rows = await KernelRow.batch_load_by_session_id( session, list(metric_requested_sessions) ) for kernel in kernel_rows: @@ -1422,11 +1422,11 @@ async def _autoscale_endpoints( # to speed up and lower the pressure to the redis we must load every metrics # in bulk, not querying each key at once - kernel_live_stats = await KernelStatistics.bulk_load_kernel_metrics( + kernel_live_stats = await KernelStatistics.batch_load_by_kernel_impl( self.redis_stat, cast(list[SessionId], list(metric_requested_kernels)), ) - endpoint_live_stats = await EndpointStatistics.bulk_load_endpoint_metrics( + endpoint_live_stats = await EndpointStatistics.batch_load_by_endpoint_impl( self.redis_stat, cast(list[SessionId], list(metric_requested_endpoints)), ) From d0a0af83d12637c9981450452e210e8f42b89221 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Mon, 30 Dec 2024 13:46:48 +0900 Subject: [PATCH 13/75] fix: Remove unnecessary version-added notes in new GQL object type's field descriptions --- .../manager/models/gql_models/endpoint.py | 60 +++++++++---------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/src/ai/backend/manager/models/gql_models/endpoint.py b/src/ai/backend/manager/models/gql_models/endpoint.py index 4cd3038a31f..b7e627c457c 100644 --- a/src/ai/backend/manager/models/gql_models/endpoint.py +++ b/src/ai/backend/manager/models/gql_models/endpoint.py @@ -68,22 +68,22 @@ class Meta: interfaces = (AsyncNode,) description = "Added in 24.12.0." - row_id = graphene.UUID(required=True, description="Added in 24.12.0.") + row_id = graphene.UUID(required=True) - metric_source = graphene.String(required=True, description="Added in 24.12.0.") - metric_name = graphene.String(required=True, description="Added in 24.12.0.") - threshold = graphene.String(required=True, description="Added in 24.12.0.") - comparator = graphene.String(required=True, description="Added in 24.12.0.") - step_size = graphene.Int(required=True, description="Added in 24.12.0.") - cooldown_seconds = graphene.Int(required=True, description="Added in 24.12.0.") + metric_source = graphene.String(required=True) + metric_name = graphene.String(required=True) + threshold = graphene.String(required=True) + comparator = graphene.String(required=True) + step_size = graphene.Int(required=True) + cooldown_seconds = graphene.Int(required=True) - min_replicas = graphene.Int(description="Added in 24.12.0.") - max_replicas = graphene.Int(description="Added in 24.12.0.") + min_replicas = graphene.Int() + max_replicas = graphene.Int() - created_at = GQLDateTime(required=True, description="Added in 24.12.0.") - last_triggered_at = GQLDateTime(description="Added in 24.12.0.") + created_at = GQLDateTime(required=True) + last_triggered_at = GQLDateTime() - endpoint = graphene.UUID(required=True, description="Added in 24.12.0.") + endpoint = graphene.UUID(required=True) @classmethod def from_row( @@ -222,22 +222,20 @@ class Meta: metric_source = graphene.String( required=True, - description=( - f"Added in 24.12.0. Available values: {", ".join([p.name for p in AutoScalingMetricSource])}" - ), + description=(f"Available values: {", ".join([p.name for p in AutoScalingMetricSource])}"), ) - metric_name = graphene.String(required=True, description="Added in 24.12.0.") - threshold = graphene.String(required=True, description="Added in 24.12.0.") + metric_name = graphene.String(required=True) + threshold = graphene.String(required=True) comparator = graphene.String( required=True, description=( - f"Added in 24.12.0. Available values: {", ".join([p.name for p in AutoScalingMetricComparator])}" + f"Available values: {", ".join([p.name for p in AutoScalingMetricComparator])}" ), ) - step_size = graphene.Int(required=True, description="Added in 24.12.0.") - cooldown_seconds = graphene.Int(required=True, description="Added in 24.12.0.") - min_replicas = graphene.Int(description="Added in 24.12.0.") - max_replicas = graphene.Int(description="Added in 24.12.0.") + step_size = graphene.Int(required=True) + cooldown_seconds = graphene.Int(required=True) + min_replicas = graphene.Int() + max_replicas = graphene.Int() class ModifyEndpointAutoScalingRuleInput(graphene.InputObjectType): @@ -245,21 +243,19 @@ class Meta: description = "Added in 24.12.0." metric_source = graphene.String( - description=( - f"Added in 24.12.0. Available values: {", ".join([p.name for p in AutoScalingMetricSource])}" - ) + description=(f"Available values: {", ".join([p.name for p in AutoScalingMetricSource])}") ) - metric_name = graphene.String(description="Added in 24.12.0.") - threshold = graphene.String(description="Added in 24.12.0.") + metric_name = graphene.String() + threshold = graphene.String() comparator = graphene.String( description=( - f"Added in 24.12.0. Available values: {", ".join([p.name for p in AutoScalingMetricComparator])}" + f"Available values: {", ".join([p.name for p in AutoScalingMetricComparator])}" ) ) - step_size = graphene.Int(description="Added in 24.12.0.") - cooldown_seconds = graphene.Int(description="Added in 24.12.0.") - min_replicas = graphene.Int(description="Added in 24.12.0.") - max_replicas = graphene.Int(description="Added in 24.12.0.") + step_size = graphene.Int() + cooldown_seconds = graphene.Int() + min_replicas = graphene.Int() + max_replicas = graphene.Int() class CreateEndpointAutoScalingRuleNode(graphene.Mutation): From bb7db5ec2a8a3be87db7dfb0d6a528efd2540a89 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Mon, 30 Dec 2024 04:49:40 +0000 Subject: [PATCH 14/75] chore: update GraphQL schema dump Co-authored-by: octodog --- src/ai/backend/manager/api/schema.graphql | 56 ++--------------------- 1 file changed, 4 insertions(+), 52 deletions(-) diff --git a/src/ai/backend/manager/api/schema.graphql b/src/ai/backend/manager/api/schema.graphql index 9e4aaa9a969..fad6c657b0d 100644 --- a/src/ai/backend/manager/api/schema.graphql +++ b/src/ai/backend/manager/api/schema.graphql @@ -1661,41 +1661,17 @@ type NetworkEdge { type EndpointAutoScalingRuleNode implements Node { """The ID of the object""" id: ID! - - """Added in 24.12.0.""" row_id: UUID! - - """Added in 24.12.0.""" metric_source: String! - - """Added in 24.12.0.""" metric_name: String! - - """Added in 24.12.0.""" threshold: String! - - """Added in 24.12.0.""" comparator: String! - - """Added in 24.12.0.""" step_size: Int! - - """Added in 24.12.0.""" cooldown_seconds: Int! - - """Added in 24.12.0.""" min_replicas: Int - - """Added in 24.12.0.""" max_replicas: Int - - """Added in 24.12.0.""" created_at: DateTime! - - """Added in 24.12.0.""" last_triggered_at: DateTime - - """Added in 24.12.0.""" endpoint: UUID! } @@ -2670,30 +2646,18 @@ type CreateEndpointAutoScalingRuleNode { """Added in 24.12.0.""" input EndpointAutoScalingRuleInput { - """Added in 24.12.0. Available values: KERNEL, INFERENCE_FRAMEWORK""" + """Available values: KERNEL, INFERENCE_FRAMEWORK""" metric_source: String! - - """Added in 24.12.0.""" metric_name: String! - - """Added in 24.12.0.""" threshold: String! """ - Added in 24.12.0. Available values: LESS_THAN, LESS_THAN_OR_EQUAL, GREATER_THAN, GREATER_THAN_OR_EQUAL + Available values: LESS_THAN, LESS_THAN_OR_EQUAL, GREATER_THAN, GREATER_THAN_OR_EQUAL """ comparator: String! - - """Added in 24.12.0.""" step_size: Int! - - """Added in 24.12.0.""" cooldown_seconds: Int! - - """Added in 24.12.0.""" min_replicas: Int - - """Added in 24.12.0.""" max_replicas: Int } @@ -2706,30 +2670,18 @@ type ModifyEndpointAutoScalingRuleNode { """Added in 24.12.0.""" input ModifyEndpointAutoScalingRuleInput { - """Added in 24.12.0. Available values: KERNEL, INFERENCE_FRAMEWORK""" + """Available values: KERNEL, INFERENCE_FRAMEWORK""" metric_source: String - - """Added in 24.12.0.""" metric_name: String - - """Added in 24.12.0.""" threshold: String """ - Added in 24.12.0. Available values: LESS_THAN, LESS_THAN_OR_EQUAL, GREATER_THAN, GREATER_THAN_OR_EQUAL + Available values: LESS_THAN, LESS_THAN_OR_EQUAL, GREATER_THAN, GREATER_THAN_OR_EQUAL """ comparator: String - - """Added in 24.12.0.""" step_size: Int - - """Added in 24.12.0.""" cooldown_seconds: Int - - """Added in 24.12.0.""" min_replicas: Int - - """Added in 24.12.0.""" max_replicas: Int } From cf9f1ee75ca48a9a336c0f9df4b454d4c1dfd349 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Mon, 30 Dec 2024 13:49:55 +0900 Subject: [PATCH 15/75] fix: Ensure floating-point format when stringifying decimal column values --- src/ai/backend/manager/models/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ai/backend/manager/models/base.py b/src/ai/backend/manager/models/base.py index 36730e00eb8..510e9df2d6f 100644 --- a/src/ai/backend/manager/models/base.py +++ b/src/ai/backend/manager/models/base.py @@ -1672,7 +1672,7 @@ def process_bind_param( value: Optional[Decimal], dialect: Dialect, ) -> Optional[str]: - return str(value) if value else None + return f"{value:f}" if value else None def process_result_value( self, From dc341d82a5d8faabbc210307baa7debcb73b6c2a Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Mon, 30 Dec 2024 14:34:50 +0900 Subject: [PATCH 16/75] fix: Typo in the database column --- src/ai/backend/manager/models/endpoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ai/backend/manager/models/endpoint.py b/src/ai/backend/manager/models/endpoint.py index 7b1e903a3ae..7d2d1583f6b 100644 --- a/src/ai/backend/manager/models/endpoint.py +++ b/src/ai/backend/manager/models/endpoint.py @@ -595,7 +595,7 @@ class EndpointAutoScalingRuleRow(Base): cooldown_seconds = sa.Column("cooldown_seconds", sa.Integer(), nullable=False, default=300) min_replicas = sa.Column("min_replicas", sa.Integer(), nullable=True) - max_replicas = sa.Column("min_replicas", sa.Integer(), nullable=True) + max_replicas = sa.Column("max_replicas", sa.Integer(), nullable=True) created_at = sa.Column( "created_at", From 846f13d2b40d235c04bc04cc2871f7f274ad55f2 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Mon, 30 Dec 2024 14:46:15 +0900 Subject: [PATCH 17/75] fix: Missing await --- src/ai/backend/manager/models/endpoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ai/backend/manager/models/endpoint.py b/src/ai/backend/manager/models/endpoint.py index 7d2d1583f6b..269a8505ee6 100644 --- a/src/ai/backend/manager/models/endpoint.py +++ b/src/ai/backend/manager/models/endpoint.py @@ -669,7 +669,7 @@ async def remove_rule( self, session: AsyncSession, ) -> None: - session.delete(self) + await session.delete(self) class ModelServicePredicateChecker: From 580fb8eb0bd9c7e03bdf86b6d096370e0d4ceb43 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Mon, 30 Dec 2024 15:05:41 +0900 Subject: [PATCH 18/75] fix: Remove boilerplate constructor --- src/ai/backend/manager/models/endpoint.py | 24 ----------------------- 1 file changed, 24 deletions(-) diff --git a/src/ai/backend/manager/models/endpoint.py b/src/ai/backend/manager/models/endpoint.py index 269a8505ee6..32a427abf7b 100644 --- a/src/ai/backend/manager/models/endpoint.py +++ b/src/ai/backend/manager/models/endpoint.py @@ -641,30 +641,6 @@ async def get( raise ObjectNotFound("endpoint_auto_scaling_rule") return row - def __init__( - self, - id: uuid.UUID, - endpoint: uuid.UUID, - metric_source: AutoScalingMetricSource, - metric_name: str, - threshold: Decimal, - comparator: AutoScalingMetricComparator, - step_size: int, - cooldown_seconds: int = 300, - min_replicas: int | None = None, - max_replicas: int | None = None, - ) -> None: - self.id = id - self.endpoint = endpoint - self.metric_source = metric_source - self.metric_name = metric_name - self.threshold = threshold - self.comparator = comparator - self.step_size = step_size - self.cooldown_seconds = cooldown_seconds - self.min_replicas = min_replicas - self.max_replicas = max_replicas - async def remove_rule( self, session: AsyncSession, From 913569e64edf0a157deaf8cfdaaff12188d3012d Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Mon, 30 Dec 2024 17:43:10 +0900 Subject: [PATCH 19/75] fix: Let StrEnum-based fields accept values, not capitalized names --- src/ai/backend/manager/models/gql_models/endpoint.py | 12 ++++++------ src/ai/backend/manager/models/utils.py | 8 ++++++++ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/ai/backend/manager/models/gql_models/endpoint.py b/src/ai/backend/manager/models/gql_models/endpoint.py index b7e627c457c..1b1ef0a61fc 100644 --- a/src/ai/backend/manager/models/gql_models/endpoint.py +++ b/src/ai/backend/manager/models/gql_models/endpoint.py @@ -8,12 +8,11 @@ from graphql import Undefined from sqlalchemy.orm.exc import NoResultFound -from ai.backend.manager.api.exceptions import ( +from ...api.exceptions import ( GenericForbidden, InvalidAPIParameters, ObjectNotFound, ) - from ..base import ( FilterExprArg, OrderExprArg, @@ -31,6 +30,7 @@ from ..minilang.ordering import OrderSpecItem, QueryOrderParser from ..minilang.queryfilter import FieldSpecItem, QueryFilterParser from ..user import UserRole +from ..utils import generate_desc_for_enum_kvlist if TYPE_CHECKING: from ..gql import GraphQueryContext @@ -222,14 +222,14 @@ class Meta: metric_source = graphene.String( required=True, - description=(f"Available values: {", ".join([p.name for p in AutoScalingMetricSource])}"), + description=(f"Available values: {generate_desc_for_enum_kvlist(AutoScalingMetricSource)}"), ) metric_name = graphene.String(required=True) threshold = graphene.String(required=True) comparator = graphene.String( required=True, description=( - f"Available values: {", ".join([p.name for p in AutoScalingMetricComparator])}" + f"Available values: {generate_desc_for_enum_kvlist(AutoScalingMetricComparator)}" ), ) step_size = graphene.Int(required=True) @@ -307,13 +307,13 @@ async def mutate( raise GenericForbidden try: - _source = AutoScalingMetricSource[props.metric_source] + _source = AutoScalingMetricSource(props.metric_source) except (KeyError, ValueError): raise InvalidAPIParameters( f"Unsupported AutoScalingMetricSource {props.metric_source}" ) try: - _comparator = AutoScalingMetricComparator[props.comparator] + _comparator = AutoScalingMetricComparator(props.comparator) except (KeyError, ValueError): raise InvalidAPIParameters( f"Unsupported AutoScalingMetricComparator {props.comparator}" diff --git a/src/ai/backend/manager/models/utils.py b/src/ai/backend/manager/models/utils.py index 2731b9fce4d..3d66f1a998c 100644 --- a/src/ai/backend/manager/models/utils.py +++ b/src/ai/backend/manager/models/utils.py @@ -1,6 +1,7 @@ from __future__ import annotations import asyncio +import enum import functools import json import logging @@ -550,3 +551,10 @@ async def vacuum_db( vacuum_sql = "VACUUM FULL" if vacuum_full else "VACUUM" log.info(f"Perfoming {vacuum_sql} operation...") await conn.exec_driver_sql(vacuum_sql) + + +def generate_desc_for_enum_kvlist(e: type[enum.StrEnum]) -> str: + items = [] + for name, value in e.__members__.items(): + items.append(f"{str(value)!r} ({name})") + return ", ".join(items) From 0ddf2e8c5f21bfe0f0a0d22dc596ad3d60707b9c Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Mon, 30 Dec 2024 08:45:30 +0000 Subject: [PATCH 20/75] chore: update GraphQL schema dump Co-authored-by: octodog --- src/ai/backend/manager/api/schema.graphql | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ai/backend/manager/api/schema.graphql b/src/ai/backend/manager/api/schema.graphql index fad6c657b0d..21c47fad61a 100644 --- a/src/ai/backend/manager/api/schema.graphql +++ b/src/ai/backend/manager/api/schema.graphql @@ -2646,13 +2646,15 @@ type CreateEndpointAutoScalingRuleNode { """Added in 24.12.0.""" input EndpointAutoScalingRuleInput { - """Available values: KERNEL, INFERENCE_FRAMEWORK""" + """ + Available values: 'kernel' (KERNEL), 'inference-framework' (INFERENCE_FRAMEWORK) + """ metric_source: String! metric_name: String! threshold: String! """ - Available values: LESS_THAN, LESS_THAN_OR_EQUAL, GREATER_THAN, GREATER_THAN_OR_EQUAL + Available values: 'lt' (LESS_THAN), 'le' (LESS_THAN_OR_EQUAL), 'gt' (GREATER_THAN), 'ge' (GREATER_THAN_OR_EQUAL) """ comparator: String! step_size: Int! From 3d4563ad401fcf17c0b2b436c4184033f8b69c7f Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Mon, 30 Dec 2024 17:49:52 +0900 Subject: [PATCH 21/75] refactor: We have typing.Self --- .../manager/models/gql_models/endpoint.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/ai/backend/manager/models/gql_models/endpoint.py b/src/ai/backend/manager/models/gql_models/endpoint.py index 1b1ef0a61fc..ce2dbb7356a 100644 --- a/src/ai/backend/manager/models/gql_models/endpoint.py +++ b/src/ai/backend/manager/models/gql_models/endpoint.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import decimal import uuid from typing import TYPE_CHECKING, Mapping, Self @@ -86,9 +88,7 @@ class Meta: endpoint = graphene.UUID(required=True) @classmethod - def from_row( - cls, graph_ctx: "GraphQueryContext", row: EndpointAutoScalingRuleRow - ) -> "EndpointAutoScalingRuleNode": + def from_row(cls, graph_ctx: GraphQueryContext, row: EndpointAutoScalingRuleRow) -> Self: return EndpointAutoScalingRuleNode( id=row.id, row_id=row.id, @@ -106,9 +106,7 @@ def from_row( ) @classmethod - async def get_node( - cls, info: graphene.ResolveInfo, rule_id: str - ) -> "EndpointAutoScalingRuleNode": + async def get_node(cls, info: graphene.ResolveInfo, rule_id: str) -> Self: graph_ctx: GraphQueryContext = info.context _, raw_rule_id = AsyncNode.resolve_global_id(info, rule_id) @@ -279,7 +277,7 @@ async def mutate( info: graphene.ResolveInfo, endpoint: str, props: EndpointAutoScalingRuleInput, - ) -> "CreateEndpointAutoScalingRuleNode": + ) -> Self: _, raw_endpoint_id = AsyncNode.resolve_global_id(info, endpoint) if not raw_endpoint_id: raw_endpoint_id = endpoint @@ -365,7 +363,7 @@ async def mutate( info: graphene.ResolveInfo, id: str, props: ModifyEndpointAutoScalingRuleInput, - ) -> "ModifyEndpointAutoScalingRuleNode": + ) -> Self: _, rule_id = AsyncNode.resolve_global_id(info, id) if not rule_id: rule_id = id @@ -444,7 +442,7 @@ async def mutate( root, info: graphene.ResolveInfo, id: str, - ) -> "DeleteEndpointAutoScalingRuleNode": + ) -> Self: _, rule_id = AsyncNode.resolve_global_id(info, id) if not rule_id: rule_id = id From 3a6022541f2f7d88c7c7c0a7b6c45cb3d78e6f1d Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Mon, 30 Dec 2024 18:00:04 +0900 Subject: [PATCH 22/75] fix,refactor: Use simpler types and fix missing await in session.delete() --- .../manager/models/gql_models/endpoint.py | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/ai/backend/manager/models/gql_models/endpoint.py b/src/ai/backend/manager/models/gql_models/endpoint.py index ce2dbb7356a..f749a3bb99c 100644 --- a/src/ai/backend/manager/models/gql_models/endpoint.py +++ b/src/ai/backend/manager/models/gql_models/endpoint.py @@ -89,7 +89,7 @@ class Meta: @classmethod def from_row(cls, graph_ctx: GraphQueryContext, row: EndpointAutoScalingRuleRow) -> Self: - return EndpointAutoScalingRuleNode( + return cls( id=row.id, row_id=row.id, metric_source=row.metric_source.name, @@ -321,7 +321,7 @@ async def mutate( except decimal.InvalidOperation: raise InvalidAPIParameters(f"Cannot convert {props.threshold} to Decimal") - async def _do_mutate() -> CreateEndpointAutoScalingRuleNode: + async def _do_mutate() -> Self: created_rule = await row.create_auto_scaling_rule( db_session, _source, @@ -333,13 +333,13 @@ async def _do_mutate() -> CreateEndpointAutoScalingRuleNode: min_replicas=props.min_replicas, max_replicas=props.max_replicas, ) - return CreateEndpointAutoScalingRuleNode( + return cls( ok=True, msg="Auto scaling rule created", rule=EndpointAutoScalingRuleNode.from_row(info.context, created_rule), ) - return await gql_mutation_wrapper(CreateEndpointAutoScalingRuleNode, _do_mutate) + return await gql_mutation_wrapper(cls, _do_mutate) class ModifyEndpointAutoScalingRuleNode(graphene.Mutation): @@ -390,15 +390,15 @@ async def mutate( if row.endpoint_row.created_user != graph_ctx.user["uuid"]: raise GenericForbidden - async def _do_mutate() -> CreateEndpointAutoScalingRuleNode: + async def _do_mutate() -> Self: if (_newval := props.metric_source) and _newval is not Undefined: try: - row.metric_source = AutoScalingMetricSource[_newval] + row.metric_source = AutoScalingMetricSource(_newval) except (KeyError, ValueError): raise InvalidAPIParameters(f"Unsupported AutoScalingMetricSource {_newval}") if (_newval := props.comparator) and _newval is not Undefined: try: - row.comparator = AutoScalingMetricComparator[_newval] + row.comparator = AutoScalingMetricComparator(_newval) except (KeyError, ValueError): raise InvalidAPIParameters( f"Unsupported AutoScalingMetricComparator {_newval}" @@ -415,13 +415,13 @@ async def _do_mutate() -> CreateEndpointAutoScalingRuleNode: set_if_set(props, row, "min_replicas") set_if_set(props, row, "max_replicas") - return ModifyEndpointAutoScalingRuleNode( + return cls( ok=True, msg="Auto scaling rule updated", rule=EndpointAutoScalingRuleNode.from_row(info.context, row), ) - return await gql_mutation_wrapper(ModifyEndpointAutoScalingRuleNode, _do_mutate) + return await gql_mutation_wrapper(cls, _do_mutate) class DeleteEndpointAutoScalingRuleNode(graphene.Mutation): @@ -469,12 +469,11 @@ async def mutate( if row.endpoint_row.created_user != graph_ctx.user["uuid"]: raise GenericForbidden - async def _do_mutate() -> DeleteEndpointAutoScalingRuleNode: - db_session.delete(row) - - return DeleteEndpointAutoScalingRuleNode( + async def _do_mutate() -> Self: + await db_session.delete(row) + return cls( ok=True, msg="Auto scaling rule removed", ) - return await gql_mutation_wrapper(DeleteEndpointAutoScalingRuleNode, _do_mutate) + return await gql_mutation_wrapper(cls, _do_mutate) From 58dbb9d6ebef3773640aff3bbefe0b1a962e383c Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Mon, 30 Dec 2024 18:04:20 +0900 Subject: [PATCH 23/75] fix: We don't need __init__() boilerplate! --- src/ai/backend/manager/models/endpoint.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/ai/backend/manager/models/endpoint.py b/src/ai/backend/manager/models/endpoint.py index 32a427abf7b..1dc3f601939 100644 --- a/src/ai/backend/manager/models/endpoint.py +++ b/src/ai/backend/manager/models/endpoint.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import datetime import logging import uuid @@ -469,15 +471,15 @@ async def create_auto_scaling_rule( cooldown_seconds: int = 300, min_replicas: int | None = None, max_replicas: int | None = None, - ) -> "EndpointAutoScalingRuleRow": + ) -> EndpointAutoScalingRuleRow: row = EndpointAutoScalingRuleRow( - uuid.uuid4(), - self.id, - metric_source, - metric_name, - threshold, - comparator, - step_size, + id=uuid.uuid4(), + endpoint=self.id, + metric_source=metric_source, + metric_name=metric_name, + threshold=threshold, + comparator=comparator, + step_size=step_size, cooldown_seconds=cooldown_seconds, min_replicas=min_replicas, max_replicas=max_replicas, From a3dddf6eab5491e535bf665ee773de2f7e44ee6f Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Mon, 30 Dec 2024 18:24:12 +0900 Subject: [PATCH 24/75] refactor: Use simpler types --- src/ai/backend/manager/models/endpoint.py | 72 ++++++++++++++--------- 1 file changed, 43 insertions(+), 29 deletions(-) diff --git a/src/ai/backend/manager/models/endpoint.py b/src/ai/backend/manager/models/endpoint.py index 1dc3f601939..8216ed30f31 100644 --- a/src/ai/backend/manager/models/endpoint.py +++ b/src/ai/backend/manager/models/endpoint.py @@ -3,10 +3,20 @@ import datetime import logging import uuid +from collections.abc import ( + Mapping, + Sequence, +) from decimal import Decimal from enum import Enum, StrEnum from pathlib import Path -from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, Optional, Sequence, cast +from typing import ( + TYPE_CHECKING, + Any, + Optional, + Self, + cast, +) import graphene import jwt @@ -309,7 +319,7 @@ async def get( load_created_user=False, load_session_owner=False, load_model=False, - ) -> "EndpointRow": + ) -> Self: """ :raises: sqlalchemy.orm.exc.NoResultFound """ @@ -353,7 +363,7 @@ async def list( load_created_user=False, load_session_owner=False, status_filter=[EndpointLifecycle.CREATED], - ) -> List["EndpointRow"]: + ) -> list[Self]: query = ( sa.select(EndpointRow) .order_by(sa.desc(EndpointRow.created_at)) @@ -382,7 +392,7 @@ async def list( async def batch_load( cls, session: AsyncSession, - endpoint_ids: List[uuid.UUID], + endpoint_ids: Sequence[uuid.UUID], domain: Optional[str] = None, project: Optional[uuid.UUID] = None, user_uuid: Optional[uuid.UUID] = None, @@ -392,7 +402,7 @@ async def batch_load( load_created_user=False, load_session_owner=False, status_filter=[EndpointLifecycle.CREATED], - ) -> List["EndpointRow"]: + ) -> Sequence[Self]: query = ( sa.select(EndpointRow) .order_by(sa.desc(EndpointRow.created_at)) @@ -433,7 +443,7 @@ async def list_by_model( load_created_user=False, load_session_owner=False, status_filter=[EndpointLifecycle.CREATED], - ) -> List["EndpointRow"]: + ) -> Sequence[Self]: query = ( sa.select(EndpointRow) .order_by(sa.desc(EndpointRow.created_at)) @@ -541,7 +551,7 @@ async def list( project: Optional[uuid.UUID] = None, user_uuid: Optional[uuid.UUID] = None, load_endpoint=False, - ) -> Iterable["EndpointTokenRow"]: + ) -> Sequence[Self]: query = ( sa.select(EndpointTokenRow) .filter(EndpointTokenRow.endpoint == endpoint_id) @@ -568,7 +578,7 @@ async def get( project: Optional[uuid.UUID] = None, user_uuid: Optional[uuid.UUID] = None, load_endpoint=False, - ) -> "EndpointTokenRow": + ) -> Self: query = sa.select(EndpointTokenRow).filter(EndpointTokenRow.token == token) if load_endpoint: query = query.options(selectinload(EndpointTokenRow.tokens)) @@ -589,10 +599,14 @@ class EndpointAutoScalingRuleRow(Base): __tablename__ = "endpoint_auto_scaling_rules" id = IDColumn() - metric_source = sa.Column("metric_source", StrEnumType(AutoScalingMetricSource), nullable=False) + metric_source = sa.Column( + "metric_source", StrEnumType(AutoScalingMetricSource, use_name=False), nullable=False + ) metric_name = sa.Column("metric_name", sa.Text(), nullable=False) threshold = sa.Column("threshold", DecimalType(), nullable=False) - comparator = sa.Column("comparator", StrEnumType(AutoScalingMetricComparator), nullable=False) + comparator = sa.Column( + "comparator", StrEnumType(AutoScalingMetricComparator, use_name=False), nullable=False + ) step_size = sa.Column("step_size", sa.Integer(), nullable=False) cooldown_seconds = sa.Column("cooldown_seconds", sa.Integer(), nullable=False, default=300) @@ -621,9 +635,7 @@ class EndpointAutoScalingRuleRow(Base): endpoint_row = relationship("EndpointRow", back_populates="endpoint_auto_scaling_rules") @classmethod - async def list( - cls, session: AsyncSession, load_endpoint=False - ) -> list["EndpointAutoScalingRuleRow"]: + async def list(cls, session: AsyncSession, load_endpoint=False) -> Sequence[Self]: query = sa.select(EndpointAutoScalingRuleRow) if load_endpoint: query = query.options(selectinload(EndpointAutoScalingRuleRow.endpoint_row)) @@ -865,7 +877,7 @@ class RuntimeVariantInfo(graphene.ObjectType): human_readable_name = graphene.String() @classmethod - def from_enum(cls, enum: RuntimeVariant) -> "RuntimeVariantInfo": + def from_enum(cls, enum: RuntimeVariant) -> Self: return cls(name=enum.value, human_readable_name=MODEL_SERVICE_RUNTIME_PROFILES[enum].name) @@ -902,7 +914,7 @@ async def batch_load_by_endpoint( @classmethod async def batch_load_by_replica( cls, - ctx: "GraphQueryContext", + ctx: GraphQueryContext, endpoint_replica_ids: Sequence[tuple[uuid.UUID, uuid.UUID]], ) -> Sequence[Optional[Mapping[str, Any]]]: async def _build_pipeline(redis: Redis) -> Pipeline: @@ -1006,7 +1018,7 @@ async def from_row( cls, ctx, # ctx: GraphQueryContext, row: EndpointRow, - ) -> "Endpoint": + ) -> Self: return cls( endpoint_id=row.id, # image="", # deprecated, row.image_object.name, @@ -1090,7 +1102,7 @@ async def load_slice( project: Optional[uuid.UUID] = None, filter: Optional[str] = None, order: Optional[str] = None, - ) -> Sequence["Endpoint"]: + ) -> Sequence[Self]: query = ( sa.select(EndpointRow) .select_from( @@ -1157,7 +1169,7 @@ async def load_item( domain_name: Optional[str] = None, user_uuid: Optional[uuid.UUID] = None, project: uuid.UUID | None = None, - ) -> "Endpoint": + ) -> Self: """ :raises: ai.backend.manager.api.exceptions.EndpointNotFound """ @@ -1324,10 +1336,10 @@ async def mutate( info: graphene.ResolveInfo, endpoint_id: uuid.UUID, props: ModifyEndpointInput, - ) -> "ModifyEndpoint": + ) -> Self: graph_ctx: GraphQueryContext = info.context - async def _do_mutate() -> ModifyEndpoint: + async def _do_mutate() -> Self: async with graph_ctx.db.begin_session() as db_session: try: endpoint_row = await EndpointRow.get( @@ -1528,8 +1540,10 @@ def _get_vfolder_id(id_input: str) -> uuid.UUID: await db_session.commit() - return ModifyEndpoint( - True, "success", await Endpoint.from_row(graph_ctx, endpoint_row) + return cls( + True, + "success", + await Endpoint.from_row(graph_ctx, endpoint_row), ) return await gql_mutation_wrapper( @@ -1556,7 +1570,7 @@ async def from_row( cls, ctx, # ctx: GraphQueryContext, row: EndpointTokenRow, - ) -> "EndpointToken": + ) -> Self: return cls( token=row.token, endpoint_id=row.endpoint, @@ -1602,7 +1616,7 @@ async def load_slice( project: Optional[uuid.UUID] = None, domain_name: Optional[str] = None, user_uuid: Optional[uuid.UUID] = None, - ) -> Sequence["EndpointToken"]: + ) -> Sequence[Self]: query = ( sa.select(EndpointTokenRow) .limit(limit) @@ -1632,13 +1646,13 @@ async def load_slice( @classmethod async def load_all( cls, - ctx, # ctx: GraphQueryContext + ctx: GraphQueryContext, endpoint_id: uuid.UUID, *, project: Optional[uuid.UUID] = None, domain_name: Optional[str] = None, user_uuid: Optional[uuid.UUID] = None, - ) -> Sequence["EndpointToken"]: + ) -> Sequence[Self]: async with ctx.db.begin_readonly_session() as session: rows = await EndpointTokenRow.list( session, @@ -1647,7 +1661,7 @@ async def load_all( domain=domain_name, user_uuid=user_uuid, ) - return [await EndpointToken.from_row(ctx, row) for row in rows] + return [await cls.from_row(ctx, row) for row in rows] @classmethod async def load_item( @@ -1658,7 +1672,7 @@ async def load_item( project: Optional[uuid.UUID] = None, domain_name: Optional[str] = None, user_uuid: Optional[uuid.UUID] = None, - ) -> "EndpointToken": + ) -> Self: try: async with ctx.db.begin_readonly_session() as session: row = await EndpointTokenRow.get( @@ -1666,7 +1680,7 @@ async def load_item( ) except NoResultFound: raise EndpointTokenNotFound - return await EndpointToken.from_row(ctx, row) + return await cls.from_row(ctx, row) async def resolve_valid_until( self, From 33dcce8a13b363256143fd71f160d838d641b248 Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Thu, 2 Jan 2025 13:57:39 +0900 Subject: [PATCH 25/75] accept PR review --- .../manager/models/gql_models/endpoint.py | 52 ++++++------------- .../backend/manager/scheduler/dispatcher.py | 2 +- 2 files changed, 18 insertions(+), 36 deletions(-) diff --git a/src/ai/backend/manager/models/gql_models/endpoint.py b/src/ai/backend/manager/models/gql_models/endpoint.py index f749a3bb99c..0a2f5d50543 100644 --- a/src/ai/backend/manager/models/gql_models/endpoint.py +++ b/src/ai/backend/manager/models/gql_models/endpoint.py @@ -72,10 +72,10 @@ class Meta: row_id = graphene.UUID(required=True) - metric_source = graphene.String(required=True) + metric_source = graphene.Enum(AutoScalingMetricSource, required=True) metric_name = graphene.String(required=True) threshold = graphene.String(required=True) - comparator = graphene.String(required=True) + comparator = graphene.Enum(AutoScalingMetricComparator, required=True) step_size = graphene.Int(required=True) cooldown_seconds = graphene.Int(required=True) @@ -92,10 +92,10 @@ def from_row(cls, graph_ctx: GraphQueryContext, row: EndpointAutoScalingRuleRow) return cls( id=row.id, row_id=row.id, - metric_source=row.metric_source.name, + metric_source=row.metric_source, metric_name=row.metric_name, threshold=row.threshold, - comparator=row.comparator.name, + comparator=row.comparator, step_size=row.step_size, cooldown_seconds=row.cooldown_seconds, min_replicas=row.min_replicas, @@ -218,13 +218,15 @@ class EndpointAutoScalingRuleInput(graphene.InputObjectType): class Meta: description = "Added in 24.12.0." - metric_source = graphene.String( + metric_source = graphene.Enum( + AutoScalingMetricSource, required=True, description=(f"Available values: {generate_desc_for_enum_kvlist(AutoScalingMetricSource)}"), ) metric_name = graphene.String(required=True) threshold = graphene.String(required=True) comparator = graphene.String( + AutoScalingMetricComparator, required=True, description=( f"Available values: {generate_desc_for_enum_kvlist(AutoScalingMetricComparator)}" @@ -240,15 +242,17 @@ class ModifyEndpointAutoScalingRuleInput(graphene.InputObjectType): class Meta: description = "Added in 24.12.0." - metric_source = graphene.String( - description=(f"Available values: {", ".join([p.name for p in AutoScalingMetricSource])}") + metric_source = graphene.Enum( + AutoScalingMetricSource, + description=(f"Available values: {", ".join([p.name for p in AutoScalingMetricSource])}"), ) metric_name = graphene.String() threshold = graphene.String() - comparator = graphene.String( + comparator = graphene.Enum( + AutoScalingMetricComparator, description=( f"Available values: {", ".join([p.name for p in AutoScalingMetricComparator])}" - ) + ), ) step_size = graphene.Int() cooldown_seconds = graphene.Int() @@ -304,18 +308,6 @@ async def mutate( if row.created_user != graph_ctx.user["uuid"]: raise GenericForbidden - try: - _source = AutoScalingMetricSource(props.metric_source) - except (KeyError, ValueError): - raise InvalidAPIParameters( - f"Unsupported AutoScalingMetricSource {props.metric_source}" - ) - try: - _comparator = AutoScalingMetricComparator(props.comparator) - except (KeyError, ValueError): - raise InvalidAPIParameters( - f"Unsupported AutoScalingMetricComparator {props.comparator}" - ) try: _threshold = decimal.Decimal(props.threshold) except decimal.InvalidOperation: @@ -324,10 +316,10 @@ async def mutate( async def _do_mutate() -> Self: created_rule = await row.create_auto_scaling_rule( db_session, - _source, + props.metric_source, props.metric_name, _threshold, - _comparator, + props.comparator, props.step_size, cooldown_seconds=props.cooldown_seconds, min_replicas=props.min_replicas, @@ -391,25 +383,15 @@ async def mutate( raise GenericForbidden async def _do_mutate() -> Self: - if (_newval := props.metric_source) and _newval is not Undefined: - try: - row.metric_source = AutoScalingMetricSource(_newval) - except (KeyError, ValueError): - raise InvalidAPIParameters(f"Unsupported AutoScalingMetricSource {_newval}") - if (_newval := props.comparator) and _newval is not Undefined: - try: - row.comparator = AutoScalingMetricComparator(_newval) - except (KeyError, ValueError): - raise InvalidAPIParameters( - f"Unsupported AutoScalingMetricComparator {_newval}" - ) if (_newval := props.threshold) and _newval is not Undefined: try: row.threshold = decimal.Decimal(_newval) except decimal.InvalidOperation: raise InvalidAPIParameters(f"Cannot convert {_newval} to Decimal") + set_if_set(props, row, "metric_source") set_if_set(props, row, "metric_name") + set_if_set(props, row, "comparator") set_if_set(props, row, "step_size") set_if_set(props, row, "cooldown_seconds") set_if_set(props, row, "min_replicas") diff --git a/src/ai/backend/manager/scheduler/dispatcher.py b/src/ai/backend/manager/scheduler/dispatcher.py index 19c359b5c0a..c83429b2c0a 100644 --- a/src/ai/backend/manager/scheduler/dispatcher.py +++ b/src/ai/backend/manager/scheduler/dispatcher.py @@ -1417,7 +1417,7 @@ async def _autoscale_endpoints( session, list(metric_requested_sessions) ) for kernel in kernel_rows: - kernels_by_session_id[kernel.session].append(kernel) + kernels_by_session_id[kernel.session_id].append(kernel) metric_requested_kernels.append(kernel) # to speed up and lower the pressure to the redis we must load every metrics From 089028ab65a1f08d58c5bf0dff7f44eebb23fd33 Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Thu, 2 Jan 2025 14:01:35 +0900 Subject: [PATCH 26/75] fix graphene Enum misuse --- src/ai/backend/manager/models/gql_models/endpoint.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/ai/backend/manager/models/gql_models/endpoint.py b/src/ai/backend/manager/models/gql_models/endpoint.py index 0a2f5d50543..b1ed50bd044 100644 --- a/src/ai/backend/manager/models/gql_models/endpoint.py +++ b/src/ai/backend/manager/models/gql_models/endpoint.py @@ -72,10 +72,10 @@ class Meta: row_id = graphene.UUID(required=True) - metric_source = graphene.Enum(AutoScalingMetricSource, required=True) + metric_source = graphene.Enum.from_enum(AutoScalingMetricSource) metric_name = graphene.String(required=True) threshold = graphene.String(required=True) - comparator = graphene.Enum(AutoScalingMetricComparator, required=True) + comparator = graphene.Enum.from_enum(AutoScalingMetricComparator) step_size = graphene.Int(required=True) cooldown_seconds = graphene.Int(required=True) @@ -218,16 +218,14 @@ class EndpointAutoScalingRuleInput(graphene.InputObjectType): class Meta: description = "Added in 24.12.0." - metric_source = graphene.Enum( + metric_source = graphene.Enum.from_enum( AutoScalingMetricSource, - required=True, description=(f"Available values: {generate_desc_for_enum_kvlist(AutoScalingMetricSource)}"), ) metric_name = graphene.String(required=True) threshold = graphene.String(required=True) comparator = graphene.String( AutoScalingMetricComparator, - required=True, description=( f"Available values: {generate_desc_for_enum_kvlist(AutoScalingMetricComparator)}" ), @@ -242,13 +240,13 @@ class ModifyEndpointAutoScalingRuleInput(graphene.InputObjectType): class Meta: description = "Added in 24.12.0." - metric_source = graphene.Enum( + metric_source = graphene.Enum.from_enum( AutoScalingMetricSource, description=(f"Available values: {", ".join([p.name for p in AutoScalingMetricSource])}"), ) metric_name = graphene.String() threshold = graphene.String() - comparator = graphene.Enum( + comparator = graphene.Enum.from_enum( AutoScalingMetricComparator, description=( f"Available values: {", ".join([p.name for p in AutoScalingMetricComparator])}" From 9b478118a766a16feed7f2a95a5d8db0b1e0b3ee Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Thu, 2 Jan 2025 15:11:57 +0900 Subject: [PATCH 27/75] implement CLI function --- .../client/cli/service_auto_scaling_rule.py | 178 ++++++++++++++ .../client/func/service_auto_scaling_rule.py | 225 ++++++++++++++++++ src/ai/backend/client/output/fields.py | 16 ++ src/ai/backend/client/session.py | 5 +- src/ai/backend/common/types.py | 14 ++ src/ai/backend/manager/models/endpoint.py | 18 +- .../manager/models/gql_models/endpoint.py | 11 +- .../backend/manager/scheduler/dispatcher.py | 4 +- 8 files changed, 451 insertions(+), 20 deletions(-) create mode 100644 src/ai/backend/client/cli/service_auto_scaling_rule.py create mode 100644 src/ai/backend/client/func/service_auto_scaling_rule.py diff --git a/src/ai/backend/client/cli/service_auto_scaling_rule.py b/src/ai/backend/client/cli/service_auto_scaling_rule.py new file mode 100644 index 00000000000..268cfd3bdc6 --- /dev/null +++ b/src/ai/backend/client/cli/service_auto_scaling_rule.py @@ -0,0 +1,178 @@ +import decimal +import sys +import uuid +from typing import Any, Iterable, Optional + +import click + +from ai.backend.cli.main import main +from ai.backend.cli.types import ExitCode +from ai.backend.client.cli.extensions import pass_ctx_obj +from ai.backend.client.cli.service import get_service_id +from ai.backend.client.cli.types import CLIContext +from ai.backend.client.exceptions import BackendAPIError +from ai.backend.client.session import Session +from ai.backend.common.types import AutoScalingMetricComparator, AutoScalingMetricSource + +from ..output.fields import service_auto_scaling_rule_fields +from .pretty import print_done + +_default_list_fields = ( + service_auto_scaling_rule_fields["id"], + service_auto_scaling_rule_fields["metric_source"], + service_auto_scaling_rule_fields["metric_name"], + service_auto_scaling_rule_fields["comparator"], + service_auto_scaling_rule_fields["threshold"], +) + + +@main.group() +def network(): + """Set of model service auto scaling rule operations""" + + +@network.command() +@pass_ctx_obj +@click.argument("service", type=str, metavar="SERVICE_NAME_OR_ID") +@click.option("--metric-source", type=click.Choice(AutoScalingMetricSource), required=True) +@click.option("--metric-name", type=str, required=True) +@click.option("--threshold", type=str, required=True) +@click.option("--comparator", type=click.Choice(AutoScalingMetricComparator), required=True) +@click.option("--step-size", type=int, required=True) +@click.option("--cooldown-seconds", type=int, required=True) +@click.option("--min-replicas", type=int) +@click.option("--max-replicas", type=int) +def create( + ctx: CLIContext, + service: str, + *, + metric_source: AutoScalingMetricSource, + metric_name: str, + threshold: str, + comparator: AutoScalingMetricComparator, + step_size: int, + cooldown_seconds: int, + min_replicas: Optional[int] = None, + max_replicas: Optional[int] = None, +) -> None: + """Create a new auto scaling rule.""" + + with Session() as session: + try: + _threshold = decimal.Decimal(threshold) + except decimal.InvalidOperation: + ctx.output.print_fail(f"{threshold} is not a valid Decimal") + sys.exit(ExitCode.FAILURE) + + try: + service_id = uuid.UUID(get_service_id(session, service)) + rule = session.ServiceAutoScalingRule.create( + service_id, + metric_source, + metric_name, + _threshold, + comparator, + step_size, + cooldown_seconds, + min_replicas=min_replicas, + max_replicas=max_replicas, + ) + print_done(f"Auto Scaling Rule (ID {rule.rule_id}) created.") + except Exception as e: + ctx.output.print_error(e) + sys.exit(ExitCode.FAILURE) + + +@network.command() +@pass_ctx_obj +@click.argument("service", type=str, metavar="SERVICE_NAME_OR_ID") +@click.option( + "-f", + "--format", + default=None, + help="Display only specified fields. When specifying multiple fields separate them with comma (,).", +) +@click.option("--filter", "filter_", default=None, help="Set the query filter expression.") +@click.option("--order", default=None, help="Set the query ordering expression.") +@click.option("--offset", default=0, help="The index of the current page start for pagination.") +@click.option("--limit", type=int, default=None, help="The page size for pagination.") +def list(ctx: CLIContext, service: str, format, filter_, order, offset, limit): + """List all set auto scaling rules for given model service.""" + + if format: + try: + fields = [service_auto_scaling_rule_fields[f.strip()] for f in format.split(",")] + except KeyError as e: + ctx.output.print_fail(f"Field {str(e)} not found") + sys.exit(ExitCode.FAILURE) + else: + fields = None + with Session() as session: + service_id = uuid.UUID(get_service_id(session, service)) + + try: + fetch_func = lambda pg_offset, pg_size: session.ServiceAutoScalingRule.paginated_list( + service_id, + page_offset=pg_offset, + page_size=pg_size, + filter=filter_, + order=order, + fields=fields, + ) + ctx.output.print_paginated_list( + fetch_func, + initial_page_offset=offset, + page_size=limit, + ) + except Exception as e: + ctx.output.print_error(e) + sys.exit(ExitCode.FAILURE) + + +@network.command() +@pass_ctx_obj +@click.argument("rule", type=str, metavar="RULE_ID") +@click.option( + "-f", + "--format", + default=None, + help="Display only specified fields. When specifying multiple fields separate them with comma (,).", +) +def get(ctx: CLIContext, rule, format): + fields: Iterable[Any] + if format: + try: + fields = [service_auto_scaling_rule_fields[f.strip()] for f in format.split(",")] + except KeyError as e: + ctx.output.print_fail(f"Field {str(e)} not found") + sys.exit(ExitCode.FAILURE) + else: + fields = _default_list_fields + + with Session() as session: + try: + rule_info = session.ServiceAutoScalingRule(uuid.UUID(rule)).get(fields=fields) + except (ValueError, BackendAPIError): + rules = session.Network.paginated_list(filter=f'name == "{rule}"', fields=fields) + if rules.total_count == 0: + ctx.output.print_fail(f"Network {rule} not found.") + sys.exit(ExitCode.FAILURE) + rule_info = rules.items[0] + + ctx.output.print_item(rule_info, fields) + + +@network.command() +@pass_ctx_obj +@click.argument("rule", type=str, metavar="NETWORK_ID_OR_NAME") +def delete(ctx: CLIContext, rule): + with Session() as session: + rule = session.ServiceAutoScalingRule(uuid.UUID(rule)) + try: + rule.get(fields=[service_auto_scaling_rule_fields["id"]]) + rule.delete() + print_done(f"Network {rule} has been deleted.") + except BackendAPIError as e: + ctx.output.print_fail(f"Failed to delete rule {rule}:") + ctx.output.print_error(e) + sys.exit(ExitCode.FAILURE) diff --git a/src/ai/backend/client/func/service_auto_scaling_rule.py b/src/ai/backend/client/func/service_auto_scaling_rule.py new file mode 100644 index 00000000000..01c2103d616 --- /dev/null +++ b/src/ai/backend/client/func/service_auto_scaling_rule.py @@ -0,0 +1,225 @@ +import textwrap +from decimal import Decimal +from typing import Optional, Sequence +from uuid import UUID + +from ai.backend.client.func.base import BaseFunction, api_function +from ai.backend.client.output.types import FieldSpec, RelayPaginatedResult +from ai.backend.client.pagination import execute_paginated_relay_query +from ai.backend.client.session import api_session +from ai.backend.common.types import AutoScalingMetricComparator, AutoScalingMetricSource + +from ..output.fields import service_auto_scaling_rule_fields + +_default_fields: Sequence[FieldSpec] = ( + service_auto_scaling_rule_fields["id"], + service_auto_scaling_rule_fields["metric_source"], + service_auto_scaling_rule_fields["metric_name"], + service_auto_scaling_rule_fields["comparator"], + service_auto_scaling_rule_fields["threshold"], + service_auto_scaling_rule_fields["endpoint"], + service_auto_scaling_rule_fields["comparator"], + service_auto_scaling_rule_fields["step_size"], + service_auto_scaling_rule_fields["cooldown_seconds"], + service_auto_scaling_rule_fields["min_replicas"], + service_auto_scaling_rule_fields["max_replicas"], + service_auto_scaling_rule_fields["created_at"], + service_auto_scaling_rule_fields["last_triggered_at"], +) + + +class ServiceAutoScalingRule(BaseFunction): + rule_id: UUID + + @api_function + @classmethod + async def paginated_list( + cls, + endpoint_id: UUID, + *, + fields: Sequence[FieldSpec] | None = None, + page_offset: int = 0, + page_size: int = 20, + filter: Optional[str] = None, + order: Optional[str] = None, + ) -> RelayPaginatedResult[dict]: + return await execute_paginated_relay_query( + "endpoint_auto_scaling_rule_nodes", + { + "endpoint": (str(endpoint_id), "String"), + "filter": (filter, "String"), + "order": (order, "String"), + }, + fields or _default_fields, + limit=page_size, + offset=page_offset, + ) + + @api_function + @classmethod + async def create( + cls, + service: UUID, + metric_source: AutoScalingMetricSource, + metric_name: str, + threshold: Decimal, + comparator: AutoScalingMetricComparator, + step_size: int, + cooldown_seconds: int, + *, + min_replicas: Optional[int] = None, + max_replicas: Optional[int] = None, + ) -> "ServiceAutoScalingRule": + q = textwrap.dedent( + """ + mutation( + $endpoint: String!, + $metric_source: String!, + $metric_name: String!, + $threshold: String!, + $comparator: String!, + $step_size: Int!, + $cooldown_seconds: Int!, + $min_replicas: Int, + $max_replicas: Int + ) { + create_endpoint_auto_scaling_rule( + endpoint: $endpoint, + props: { + metric_source: $metric_source, + metric_name: $metric_name, + threshold: $threshold, + comparator: $comparator, + step_size: $step_size, + cooldown_seconds: $cooldown_seconds, + min_replicas: $min_replicas, + max_replicas: $max_replicas + } + ) { + rule { + row_id + } + } + } + """ + ) + data = await api_session.get().Admin._query( + q, + { + "endpoint": str(service), + "metric_source": metric_source, + "metric_name": metric_name, + "threshold": threshold, + "comparator": comparator, + "step_size": step_size, + "cooldown_seconds": cooldown_seconds, + "min_replicas": min_replicas, + "max_replicas": max_replicas, + }, + ) + + return cls(rule_id=UUID(data["create_endpoint_auto_scaling_rule"]["rule"]["row_id"])) + + def __init__(self, rule_id: UUID) -> None: + super().__init__() + self.rule_id = rule_id + + @api_function + async def get( + self, + fields: Sequence[FieldSpec] | None = None, + ) -> Sequence[dict]: + query = textwrap.dedent( + """\ + query($rule_id: UUID!) { + endpoint_auto_scaling_rule_node(rule_id: $rule_id) {$fields} + } + """ + ) + query = query.replace("$fields", " ".join(f.field_ref for f in (fields or _default_fields))) + variables = {"rule_id": self.rule_id} + data = await api_session.get().Admin._query(query, variables) + return data["endpoint"] + + @api_function + async def update( + self, + *, + metric_source: Optional[AutoScalingMetricSource] = None, + metric_name: Optional[str] = None, + threshold: Optional[Decimal] = None, + comparator: Optional[AutoScalingMetricComparator] = None, + step_size: Optional[int] = None, + cooldown_seconds: Optional[int] = None, + min_replicas: Optional[int] = None, + max_replicas: Optional[int] = None, + ) -> "ServiceAutoScalingRule": + q = textwrap.dedent( + """ + mutation( + $rule_id: String!, + $metric_source: String, + $metric_name: String, + $threshold: String, + $comparator: String, + $step_size: Int, + $cooldown_seconds: Int, + $min_replicas: Int, + $max_replicas: Int + ) { + modify_endpoint_auto_scaling_rule( + id: $rule_id, + props: { + metric_source: $metric_source, + metric_name: $metric_name, + threshold: $threshold, + comparator: $comparator, + step_size: $step_size, + cooldown_seconds: $cooldown_seconds, + min_replicas: $min_replicas, + max_replicas: $max_replicas + } + ) { + rule { + ok + msg + } + } + } + """ + ) + data = await api_session.get().Admin._query( + q, + { + "rule_id": str(self.rule_id), + "metric_source": metric_source, + "metric_name": metric_name, + "threshold": threshold, + "comparator": comparator, + "step_size": step_size, + "cooldown_seconds": cooldown_seconds, + "min_replicas": min_replicas, + "max_replicas": max_replicas, + }, + ) + + return data["modify_endpoint_auto_scaling_rule"] + + @api_function + async def delete(self) -> None: + q = textwrap.dedent( + """ + mutation($rule_id: String!) { + delete_endpoint_auto_scaling_rule(id: $rule_id) { + ok + msg + } + } + """ + ) + + variables = { + "rule_id": str(self.rule_id), + } + data = await api_session.get().Admin._query(q, variables) + return data["delete_endpoint_auto_scaling_rule"] diff --git a/src/ai/backend/client/output/fields.py b/src/ai/backend/client/output/fields.py index ab45df52e25..bf67b547196 100644 --- a/src/ai/backend/client/output/fields.py +++ b/src/ai/backend/client/output/fields.py @@ -356,3 +356,19 @@ FieldSpec("created_at"), FieldSpec("updated_at", "Last Updated"), ]) + + +service_auto_scaling_rule_fields = FieldSet([ + FieldSpec(field_ref="row_id", field_name="id", alt_name="id"), + FieldSpec("endpoint"), + FieldSpec("metric_source"), + FieldSpec("metric_name"), + FieldSpec("threshold"), + FieldSpec("comparator"), + FieldSpec("step_size"), + FieldSpec("cooldown_seconds"), + FieldSpec("min_replicas"), + FieldSpec("max_replicas"), + FieldSpec("created_at"), + FieldSpec("last_triggered_at", "Last Triggered"), +]) diff --git a/src/ai/backend/client/session.py b/src/ai/backend/client/session.py index 7723a62650b..68a9cdb0bd6 100644 --- a/src/ai/backend/client/session.py +++ b/src/ai/backend/client/session.py @@ -23,6 +23,8 @@ import aiohttp from multidict import CIMultiDict +from ai.backend.client.func.service_auto_scaling_rule import ServiceAutoScalingRule + from .config import MIN_API_VERSION, APIConfig, get_config, parse_api_version from .exceptions import APIVersionWarning, BackendAPIError, BackendClientError from .types import Sentinel, sentinel @@ -270,7 +272,7 @@ class BaseSession(metaclass=abc.ABCMeta): "ServerLog", "Permission", "Service", - "Model", + "ServiceAutoScalingRule," "Model", "QuotaScope", "Network", ) @@ -344,6 +346,7 @@ def __init__( self.ServerLog = ServerLog self.Permission = Permission self.Service = Service + self.ServiceAutoScalingRule = ServiceAutoScalingRule self.Model = Model self.QuotaScope = QuotaScope self.Network = Network diff --git a/src/ai/backend/common/types.py b/src/ai/backend/common/types.py index 71e388f24aa..c802cc3886b 100644 --- a/src/ai/backend/common/types.py +++ b/src/ai/backend/common/types.py @@ -94,6 +94,8 @@ "EtcdRedisConfig", "RedisConnectionInfo", "RuntimeVariant", + "AutoScalingMetricSource", + "AutoScalingMetricComparator", "MODEL_SERVICE_RUNTIME_PROFILES", ) @@ -1316,3 +1318,15 @@ def metric_string(self) -> str: val = metric.metric_value_string(self.metric_name, self.metric_primitive) result += f"{val}\n" return result + + +class AutoScalingMetricSource(enum.StrEnum): + KERNEL = "kernel" + INFERENCE_FRAMEWORK = "inference-framework" + + +class AutoScalingMetricComparator(enum.StrEnum): + LESS_THAN = "lt" + LESS_THAN_OR_EQUAL = "le" + GREATER_THAN = "gt" + GREATER_THAN_OR_EQUAL = "ge" diff --git a/src/ai/backend/manager/models/endpoint.py b/src/ai/backend/manager/models/endpoint.py index 8216ed30f31..bfed6196182 100644 --- a/src/ai/backend/manager/models/endpoint.py +++ b/src/ai/backend/manager/models/endpoint.py @@ -8,7 +8,7 @@ Sequence, ) from decimal import Decimal -from enum import Enum, StrEnum +from enum import Enum from pathlib import Path from typing import ( TYPE_CHECKING, @@ -38,6 +38,8 @@ from ai.backend.common.types import ( MODEL_SERVICE_RUNTIME_PROFILES, AccessKey, + AutoScalingMetricComparator, + AutoScalingMetricSource, ClusterMode, ImageAlias, MountPermission, @@ -99,8 +101,6 @@ from .gql import GraphQueryContext __all__ = ( - "AutoScalingMetricSource", - "AutoScalingMetricComparator", "EndpointRow", "Endpoint", "EndpointLifecycle", @@ -124,18 +124,6 @@ class EndpointLifecycle(Enum): DESTROYED = "destroyed" -class AutoScalingMetricSource(StrEnum): - KERNEL = "kernel" - INFERENCE_FRAMEWORK = "inference-framework" - - -class AutoScalingMetricComparator(StrEnum): - LESS_THAN = "lt" - LESS_THAN_OR_EQUAL = "le" - GREATER_THAN = "gt" - GREATER_THAN_OR_EQUAL = "ge" - - class EndpointRow(Base): __tablename__ = "endpoints" diff --git a/src/ai/backend/manager/models/gql_models/endpoint.py b/src/ai/backend/manager/models/gql_models/endpoint.py index b1ed50bd044..5e08d1dedfc 100644 --- a/src/ai/backend/manager/models/gql_models/endpoint.py +++ b/src/ai/backend/manager/models/gql_models/endpoint.py @@ -10,6 +10,11 @@ from graphql import Undefined from sqlalchemy.orm.exc import NoResultFound +from ai.backend.common.types import ( + AutoScalingMetricComparator, + AutoScalingMetricSource, +) + from ...api.exceptions import ( GenericForbidden, InvalidAPIParameters, @@ -23,8 +28,6 @@ set_if_set, ) from ..endpoint import ( - AutoScalingMetricComparator, - AutoScalingMetricSource, EndpointAutoScalingRuleRow, EndpointRow, ) @@ -283,6 +286,10 @@ async def mutate( _, raw_endpoint_id = AsyncNode.resolve_global_id(info, endpoint) if not raw_endpoint_id: raw_endpoint_id = endpoint + if not props.metric_source: + raise InvalidAPIParameters("metric_source is a required field") + if not props.comparator: + raise InvalidAPIParameters("comparator is a required field") try: _endpoint_id = uuid.UUID(raw_endpoint_id) diff --git a/src/ai/backend/manager/scheduler/dispatcher.py b/src/ai/backend/manager/scheduler/dispatcher.py index c83429b2c0a..523dd00b1b9 100644 --- a/src/ai/backend/manager/scheduler/dispatcher.py +++ b/src/ai/backend/manager/scheduler/dispatcher.py @@ -60,6 +60,8 @@ from ai.backend.common.types import ( AgentId, AgentSelectionStrategy, + AutoScalingMetricComparator, + AutoScalingMetricSource, ClusterMode, RedisConnectionInfo, ResourceSlot, @@ -82,8 +84,6 @@ from ..models import ( AgentRow, AgentStatus, - AutoScalingMetricComparator, - AutoScalingMetricSource, EndpointAutoScalingRuleRow, EndpointLifecycle, EndpointRow, From 0baf316258ab98da037419cee0cb1db62e4fdb72 Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Thu, 2 Jan 2025 15:21:32 +0900 Subject: [PATCH 28/75] implement CLI function --- src/ai/backend/client/cli/__init__.py | 1 + src/ai/backend/client/cli/service_auto_scaling_rule.py | 4 ++-- src/ai/backend/client/session.py | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/ai/backend/client/cli/__init__.py b/src/ai/backend/client/cli/__init__.py index f6453d6ea17..655d8e1946e 100644 --- a/src/ai/backend/client/cli/__init__.py +++ b/src/ai/backend/client/cli/__init__.py @@ -6,6 +6,7 @@ from . import model # noqa # type: ignore from . import server_log # noqa # type: ignore from . import service # noqa # type: ignore +from . import service_auto_scaling_rule # noqa # type: ignore from . import session # noqa # type: ignore from . import session_template # noqa # type: ignore from . import vfolder # noqa # type: ignore diff --git a/src/ai/backend/client/cli/service_auto_scaling_rule.py b/src/ai/backend/client/cli/service_auto_scaling_rule.py index 268cfd3bdc6..ee5c0200294 100644 --- a/src/ai/backend/client/cli/service_auto_scaling_rule.py +++ b/src/ai/backend/client/cli/service_auto_scaling_rule.py @@ -34,10 +34,10 @@ def network(): @network.command() @pass_ctx_obj @click.argument("service", type=str, metavar="SERVICE_NAME_OR_ID") -@click.option("--metric-source", type=click.Choice(AutoScalingMetricSource), required=True) +@click.option("--metric-source", type=click.Choice([*AutoScalingMetricSource]), required=True) @click.option("--metric-name", type=str, required=True) @click.option("--threshold", type=str, required=True) -@click.option("--comparator", type=click.Choice(AutoScalingMetricComparator), required=True) +@click.option("--comparator", type=click.Choice([*AutoScalingMetricComparator]), required=True) @click.option("--step-size", type=int, required=True) @click.option("--cooldown-seconds", type=int, required=True) @click.option("--min-replicas", type=int) diff --git a/src/ai/backend/client/session.py b/src/ai/backend/client/session.py index 68a9cdb0bd6..ab8bbea8d79 100644 --- a/src/ai/backend/client/session.py +++ b/src/ai/backend/client/session.py @@ -272,7 +272,8 @@ class BaseSession(metaclass=abc.ABCMeta): "ServerLog", "Permission", "Service", - "ServiceAutoScalingRule," "Model", + "ServiceAutoScalingRule", + "Model", "QuotaScope", "Network", ) From 56798a4657c1318c7ebc713213718bbc29c5d805 Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Thu, 2 Jan 2025 15:25:47 +0900 Subject: [PATCH 29/75] fix invalid import --- src/ai/backend/client/session.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ai/backend/client/session.py b/src/ai/backend/client/session.py index ab8bbea8d79..a28c788dfb9 100644 --- a/src/ai/backend/client/session.py +++ b/src/ai/backend/client/session.py @@ -23,8 +23,6 @@ import aiohttp from multidict import CIMultiDict -from ai.backend.client.func.service_auto_scaling_rule import ServiceAutoScalingRule - from .config import MIN_API_VERSION, APIConfig, get_config, parse_api_version from .exceptions import APIVersionWarning, BackendAPIError, BackendClientError from .types import Sentinel, sentinel @@ -316,6 +314,7 @@ def __init__( from .func.scaling_group import ScalingGroup from .func.server_log import ServerLog from .func.service import Service + from .func.service_auto_scaling_rule import ServiceAutoScalingRule from .func.session import ComputeSession from .func.session_template import SessionTemplate from .func.storage import Storage From 310bb44a0897a8cee704d289f1716300336d77ca Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Thu, 2 Jan 2025 15:31:18 +0900 Subject: [PATCH 30/75] fix invalid GQL definitioN --- .../manager/models/gql_models/endpoint.py | 54 +++++++++++++------ 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/src/ai/backend/manager/models/gql_models/endpoint.py b/src/ai/backend/manager/models/gql_models/endpoint.py index 5e08d1dedfc..f1f30093544 100644 --- a/src/ai/backend/manager/models/gql_models/endpoint.py +++ b/src/ai/backend/manager/models/gql_models/endpoint.py @@ -75,10 +75,16 @@ class Meta: row_id = graphene.UUID(required=True) - metric_source = graphene.Enum.from_enum(AutoScalingMetricSource) + metric_source = graphene.Field( + graphene.Enum.from_enum(AutoScalingMetricSource), + required=True, + ) metric_name = graphene.String(required=True) threshold = graphene.String(required=True) - comparator = graphene.Enum.from_enum(AutoScalingMetricComparator) + comparator = graphene.Field( + graphene.Enum.from_enum(AutoScalingMetricComparator), + required=True, + ) step_size = graphene.Int(required=True) cooldown_seconds = graphene.Int(required=True) @@ -221,17 +227,25 @@ class EndpointAutoScalingRuleInput(graphene.InputObjectType): class Meta: description = "Added in 24.12.0." - metric_source = graphene.Enum.from_enum( - AutoScalingMetricSource, - description=(f"Available values: {generate_desc_for_enum_kvlist(AutoScalingMetricSource)}"), + metric_source = graphene.Field( + graphene.Enum.from_enum( + AutoScalingMetricSource, + description=( + f"Available values: {generate_desc_for_enum_kvlist(AutoScalingMetricSource)}" + ), + ), + required=True, ) metric_name = graphene.String(required=True) threshold = graphene.String(required=True) - comparator = graphene.String( - AutoScalingMetricComparator, - description=( - f"Available values: {generate_desc_for_enum_kvlist(AutoScalingMetricComparator)}" + comparator = graphene.Field( + graphene.String( + AutoScalingMetricComparator, + description=( + f"Available values: {generate_desc_for_enum_kvlist(AutoScalingMetricComparator)}" + ), ), + required=True, ) step_size = graphene.Int(required=True) cooldown_seconds = graphene.Int(required=True) @@ -243,17 +257,23 @@ class ModifyEndpointAutoScalingRuleInput(graphene.InputObjectType): class Meta: description = "Added in 24.12.0." - metric_source = graphene.Enum.from_enum( - AutoScalingMetricSource, - description=(f"Available values: {", ".join([p.name for p in AutoScalingMetricSource])}"), + metric_source = graphene.Field( + graphene.Enum.from_enum( + AutoScalingMetricSource, + description=( + f"Available values: {", ".join([p.name for p in AutoScalingMetricSource])}" + ), + ), ) metric_name = graphene.String() threshold = graphene.String() - comparator = graphene.Enum.from_enum( - AutoScalingMetricComparator, - description=( - f"Available values: {", ".join([p.name for p in AutoScalingMetricComparator])}" - ), + comparator = graphene.Field( + graphene.Enum.from_enum( + AutoScalingMetricComparator, + description=( + f"Available values: {", ".join([p.name for p in AutoScalingMetricComparator])}" + ), + ) ) step_size = graphene.Int() cooldown_seconds = graphene.Int() From 353e8885bf39e82ef2653b59fe433cdcd70f721b Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Thu, 2 Jan 2025 15:35:11 +0900 Subject: [PATCH 31/75] fix typo --- src/ai/backend/manager/models/gql_models/endpoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ai/backend/manager/models/gql_models/endpoint.py b/src/ai/backend/manager/models/gql_models/endpoint.py index f1f30093544..3a92d891192 100644 --- a/src/ai/backend/manager/models/gql_models/endpoint.py +++ b/src/ai/backend/manager/models/gql_models/endpoint.py @@ -239,7 +239,7 @@ class Meta: metric_name = graphene.String(required=True) threshold = graphene.String(required=True) comparator = graphene.Field( - graphene.String( + graphene.Enum.from_enum( AutoScalingMetricComparator, description=( f"Available values: {generate_desc_for_enum_kvlist(AutoScalingMetricComparator)}" From fd0477126a4a9e70545cd476ed21250d47d7229a Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Thu, 2 Jan 2025 06:37:35 +0000 Subject: [PATCH 32/75] chore: update GraphQL schema dump Co-authored-by: octodog --- src/ai/backend/manager/api/schema.graphql | 38 ++++++++++++----------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/src/ai/backend/manager/api/schema.graphql b/src/ai/backend/manager/api/schema.graphql index 21c47fad61a..4839fe6451b 100644 --- a/src/ai/backend/manager/api/schema.graphql +++ b/src/ai/backend/manager/api/schema.graphql @@ -1662,10 +1662,10 @@ type EndpointAutoScalingRuleNode implements Node { """The ID of the object""" id: ID! row_id: UUID! - metric_source: String! + metric_source: AutoScalingMetricSource! metric_name: String! threshold: String! - comparator: String! + comparator: AutoScalingMetricComparator! step_size: Int! cooldown_seconds: Int! min_replicas: Int @@ -1675,6 +1675,20 @@ type EndpointAutoScalingRuleNode implements Node { endpoint: UUID! } +"""An enumeration.""" +enum AutoScalingMetricSource { + KERNEL + INFERENCE_FRAMEWORK +} + +"""An enumeration.""" +enum AutoScalingMetricComparator { + LESS_THAN + LESS_THAN_OR_EQUAL + GREATER_THAN + GREATER_THAN_OR_EQUAL +} + """Added in 24.12.0.""" type EndpointAutoScalingRuleConnection { """Pagination data for this connection.""" @@ -2646,17 +2660,10 @@ type CreateEndpointAutoScalingRuleNode { """Added in 24.12.0.""" input EndpointAutoScalingRuleInput { - """ - Available values: 'kernel' (KERNEL), 'inference-framework' (INFERENCE_FRAMEWORK) - """ - metric_source: String! + metric_source: AutoScalingMetricSource! metric_name: String! threshold: String! - - """ - Available values: 'lt' (LESS_THAN), 'le' (LESS_THAN_OR_EQUAL), 'gt' (GREATER_THAN), 'ge' (GREATER_THAN_OR_EQUAL) - """ - comparator: String! + comparator: AutoScalingMetricComparator! step_size: Int! cooldown_seconds: Int! min_replicas: Int @@ -2672,15 +2679,10 @@ type ModifyEndpointAutoScalingRuleNode { """Added in 24.12.0.""" input ModifyEndpointAutoScalingRuleInput { - """Available values: KERNEL, INFERENCE_FRAMEWORK""" - metric_source: String + metric_source: AutoScalingMetricSource = null metric_name: String threshold: String - - """ - Available values: LESS_THAN, LESS_THAN_OR_EQUAL, GREATER_THAN, GREATER_THAN_OR_EQUAL - """ - comparator: String + comparator: AutoScalingMetricComparator = null step_size: Int cooldown_seconds: Int min_replicas: Int From daa053086f4ec3ef28a457c4212e6ed4e0f7d2e0 Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Thu, 2 Jan 2025 15:42:10 +0900 Subject: [PATCH 33/75] update annotation --- .../manager/models/gql_models/endpoint.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/ai/backend/manager/models/gql_models/endpoint.py b/src/ai/backend/manager/models/gql_models/endpoint.py index 3a92d891192..a97fb3cfeee 100644 --- a/src/ai/backend/manager/models/gql_models/endpoint.py +++ b/src/ai/backend/manager/models/gql_models/endpoint.py @@ -71,18 +71,18 @@ class EndpointAutoScalingRuleNode(graphene.ObjectType): class Meta: interfaces = (AsyncNode,) - description = "Added in 24.12.0." + description = "Added in 25.01.0." row_id = graphene.UUID(required=True) metric_source = graphene.Field( - graphene.Enum.from_enum(AutoScalingMetricSource), + graphene.Enum.from_enum(AutoScalingMetricSource, description="Added in 25.01.0."), required=True, ) metric_name = graphene.String(required=True) threshold = graphene.String(required=True) comparator = graphene.Field( - graphene.Enum.from_enum(AutoScalingMetricComparator), + graphene.Enum.from_enum(AutoScalingMetricComparator, description="Added in 25.01.0."), required=True, ) step_size = graphene.Int(required=True) @@ -220,12 +220,12 @@ async def get_connection( class EndpointAutoScalingRuleConnection(Connection): class Meta: node = EndpointAutoScalingRuleNode - description = "Added in 24.12.0." + description = "Added in 25.01.0." class EndpointAutoScalingRuleInput(graphene.InputObjectType): class Meta: - description = "Added in 24.12.0." + description = "Added in 25.01.0." metric_source = graphene.Field( graphene.Enum.from_enum( @@ -255,7 +255,7 @@ class Meta: class ModifyEndpointAutoScalingRuleInput(graphene.InputObjectType): class Meta: - description = "Added in 24.12.0." + description = "Added in 25.01.0." metric_source = graphene.Field( graphene.Enum.from_enum( @@ -289,7 +289,7 @@ class Arguments: props = EndpointAutoScalingRuleInput(required=True) class Meta: - description = "Added in 24.12.0." + description = "Added in 25.01.0." ok = graphene.Boolean() msg = graphene.String() @@ -367,7 +367,7 @@ class Arguments: props = ModifyEndpointAutoScalingRuleInput(required=True) class Meta: - description = "Added in 24.12.0." + description = "Added in 25.01.0." ok = graphene.Boolean() msg = graphene.String() @@ -438,7 +438,7 @@ class Arguments: id = graphene.String(required=True) class Meta: - description = "Added in 24.12.0." + description = "Added in 25.01.0." ok = graphene.Boolean() msg = graphene.String() From f11f7a68518845a02a4981fa364ef6f1d1dd7957 Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Thu, 2 Jan 2025 06:44:20 +0000 Subject: [PATCH 34/75] chore: update GraphQL schema dump Co-authored-by: octodog --- src/ai/backend/manager/api/schema.graphql | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/ai/backend/manager/api/schema.graphql b/src/ai/backend/manager/api/schema.graphql index 4839fe6451b..938b70c7148 100644 --- a/src/ai/backend/manager/api/schema.graphql +++ b/src/ai/backend/manager/api/schema.graphql @@ -1657,7 +1657,7 @@ type NetworkEdge { cursor: String! } -"""Added in 24.12.0.""" +"""Added in 25.01.0.""" type EndpointAutoScalingRuleNode implements Node { """The ID of the object""" id: ID! @@ -1675,13 +1675,13 @@ type EndpointAutoScalingRuleNode implements Node { endpoint: UUID! } -"""An enumeration.""" +"""Added in 25.01.0.""" enum AutoScalingMetricSource { KERNEL INFERENCE_FRAMEWORK } -"""An enumeration.""" +"""Added in 25.01.0.""" enum AutoScalingMetricComparator { LESS_THAN LESS_THAN_OR_EQUAL @@ -1689,7 +1689,7 @@ enum AutoScalingMetricComparator { GREATER_THAN_OR_EQUAL } -"""Added in 24.12.0.""" +"""Added in 25.01.0.""" type EndpointAutoScalingRuleConnection { """Pagination data for this connection.""" pageInfo: PageInfo! @@ -1702,7 +1702,7 @@ type EndpointAutoScalingRuleConnection { } """ -Added in 24.12.0. A Relay edge containing a `EndpointAutoScalingRule` and its cursor. +Added in 25.01.0. A Relay edge containing a `EndpointAutoScalingRule` and its cursor. """ type EndpointAutoScalingRuleEdge { """The item at the end of the edge""" @@ -2651,14 +2651,14 @@ type DeleteContainerRegistryNode { container_registry: ContainerRegistryNode } -"""Added in 24.12.0.""" +"""Added in 25.01.0.""" type CreateEndpointAutoScalingRuleNode { ok: Boolean msg: String rule: EndpointAutoScalingRuleNode } -"""Added in 24.12.0.""" +"""Added in 25.01.0.""" input EndpointAutoScalingRuleInput { metric_source: AutoScalingMetricSource! metric_name: String! @@ -2670,14 +2670,14 @@ input EndpointAutoScalingRuleInput { max_replicas: Int } -"""Added in 24.12.0.""" +"""Added in 25.01.0.""" type ModifyEndpointAutoScalingRuleNode { ok: Boolean msg: String rule: EndpointAutoScalingRuleNode } -"""Added in 24.12.0.""" +"""Added in 25.01.0.""" input ModifyEndpointAutoScalingRuleInput { metric_source: AutoScalingMetricSource = null metric_name: String @@ -2689,7 +2689,7 @@ input ModifyEndpointAutoScalingRuleInput { max_replicas: Int } -"""Added in 24.12.0.""" +"""Added in 25.01.0.""" type DeleteEndpointAutoScalingRuleNode { ok: Boolean msg: String From 43428697942ff87850346c208bbc0f0824652136 Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Thu, 2 Jan 2025 16:40:51 +0900 Subject: [PATCH 35/75] restructure CLI --- .../client/cli/service_auto_scaling_rule.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/ai/backend/client/cli/service_auto_scaling_rule.py b/src/ai/backend/client/cli/service_auto_scaling_rule.py index ee5c0200294..b0d5b0f68f9 100644 --- a/src/ai/backend/client/cli/service_auto_scaling_rule.py +++ b/src/ai/backend/client/cli/service_auto_scaling_rule.py @@ -5,7 +5,6 @@ import click -from ai.backend.cli.main import main from ai.backend.cli.types import ExitCode from ai.backend.client.cli.extensions import pass_ctx_obj from ai.backend.client.cli.service import get_service_id @@ -16,6 +15,7 @@ from ..output.fields import service_auto_scaling_rule_fields from .pretty import print_done +from .service import service _default_list_fields = ( service_auto_scaling_rule_fields["id"], @@ -26,12 +26,12 @@ ) -@main.group() -def network(): +@service.group() +def auto_scaling_rule(): """Set of model service auto scaling rule operations""" -@network.command() +@auto_scaling_rule.command() @pass_ctx_obj @click.argument("service", type=str, metavar="SERVICE_NAME_OR_ID") @click.option("--metric-source", type=click.Choice([*AutoScalingMetricSource]), required=True) @@ -83,7 +83,7 @@ def create( sys.exit(ExitCode.FAILURE) -@network.command() +@auto_scaling_rule.command() @pass_ctx_obj @click.argument("service", type=str, metavar="SERVICE_NAME_OR_ID") @click.option( @@ -129,7 +129,7 @@ def list(ctx: CLIContext, service: str, format, filter_, order, offset, limit): sys.exit(ExitCode.FAILURE) -@network.command() +@auto_scaling_rule.command() @pass_ctx_obj @click.argument("rule", type=str, metavar="RULE_ID") @click.option( @@ -162,7 +162,7 @@ def get(ctx: CLIContext, rule, format): ctx.output.print_item(rule_info, fields) -@network.command() +@auto_scaling_rule.command() @pass_ctx_obj @click.argument("rule", type=str, metavar="NETWORK_ID_OR_NAME") def delete(ctx: CLIContext, rule): From e6bf90397ff1b7c3067b3ae9912e494dcee7989d Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Fri, 20 Dec 2024 12:38:37 +0000 Subject: [PATCH 36/75] fix typo --- .../manager/models/gql_models/endpoint.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/ai/backend/manager/models/gql_models/endpoint.py b/src/ai/backend/manager/models/gql_models/endpoint.py index a97fb3cfeee..719d967e3ec 100644 --- a/src/ai/backend/manager/models/gql_models/endpoint.py +++ b/src/ai/backend/manager/models/gql_models/endpoint.py @@ -25,7 +25,7 @@ OrderExprArg, generate_sql_info_for_gql_connection, gql_mutation_wrapper, - set_if_set, + orm_set_if_set, ) from ..endpoint import ( EndpointAutoScalingRuleRow, @@ -414,13 +414,13 @@ async def _do_mutate() -> Self: except decimal.InvalidOperation: raise InvalidAPIParameters(f"Cannot convert {_newval} to Decimal") - set_if_set(props, row, "metric_source") - set_if_set(props, row, "metric_name") - set_if_set(props, row, "comparator") - set_if_set(props, row, "step_size") - set_if_set(props, row, "cooldown_seconds") - set_if_set(props, row, "min_replicas") - set_if_set(props, row, "max_replicas") + orm_set_if_set(props, row, "metric_source") + orm_set_if_set(props, row, "metric_name") + orm_set_if_set(props, row, "comparator") + orm_set_if_set(props, row, "step_size") + orm_set_if_set(props, row, "cooldown_seconds") + orm_set_if_set(props, row, "min_replicas") + orm_set_if_set(props, row, "max_replicas") return cls( ok=True, From de6e6e71022e5921d1256bb352cfcde0b0c852f9 Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Fri, 20 Dec 2024 12:47:46 +0000 Subject: [PATCH 37/75] fix typo --- src/ai/backend/manager/scheduler/dispatcher.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ai/backend/manager/scheduler/dispatcher.py b/src/ai/backend/manager/scheduler/dispatcher.py index 523dd00b1b9..71debf09cb1 100644 --- a/src/ai/backend/manager/scheduler/dispatcher.py +++ b/src/ai/backend/manager/scheduler/dispatcher.py @@ -1508,11 +1508,11 @@ async def _autoscale_endpoints( ) if should_trigger: new_replicas = rule.endpoint_row.replicas + rule.step_size - if (rule.min_replicas is not None and new_replicas < rule.min_replias) or ( - rule.max_replicas is not None and new_replicas > rule.max_replias + if (rule.min_replicas is not None and new_replicas < rule.min_replicas) or ( + rule.max_replicas is not None and new_replicas > rule.max_replicas ): log.debug( - "_autoscale_endpoints(e: {}, r: {}): new replica count {} violates min() / max () replica restriction; skipping", + "_autoscale_endpoints(e: {}, r: {}): new replica count {} violates min ({}) / max ({}) replica restriction; skipping", rule.endpoint, rule.id, new_replicas, From 274b390b0d62ffa4146df68d520f2982c161d83d Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Thu, 2 Jan 2025 10:29:55 +0000 Subject: [PATCH 38/75] fix cli not working --- .../client/cli/service_auto_scaling_rule.py | 83 ++++++++++++-- .../client/func/service_auto_scaling_rule.py | 94 +++++++--------- src/ai/backend/common/types.py | 103 ++++++++++++++++-- .../manager/models/gql_models/endpoint.py | 4 +- 4 files changed, 210 insertions(+), 74 deletions(-) diff --git a/src/ai/backend/client/cli/service_auto_scaling_rule.py b/src/ai/backend/client/cli/service_auto_scaling_rule.py index b0d5b0f68f9..43da1d0657f 100644 --- a/src/ai/backend/client/cli/service_auto_scaling_rule.py +++ b/src/ai/backend/client/cli/service_auto_scaling_rule.py @@ -5,7 +5,8 @@ import click -from ai.backend.cli.types import ExitCode +from ai.backend.cli.params import OptionalType +from ai.backend.cli.types import ExitCode, Undefined, undefined from ai.backend.client.cli.extensions import pass_ctx_obj from ai.backend.client.cli.service import get_service_id from ai.backend.client.cli.types import CLIContext @@ -13,6 +14,7 @@ from ai.backend.client.session import Session from ai.backend.common.types import AutoScalingMetricComparator, AutoScalingMetricSource +from ..func.service_auto_scaling_rule import _default_fields as _default_get_fields from ..output.fields import service_auto_scaling_rule_fields from .pretty import print_done from .service import service @@ -139,6 +141,7 @@ def list(ctx: CLIContext, service: str, format, filter_, order, offset, limit): help="Display only specified fields. When specifying multiple fields separate them with comma (,).", ) def get(ctx: CLIContext, rule, format): + """Prints attributes of given auto scaling rule.""" fields: Iterable[Any] if format: try: @@ -147,21 +150,83 @@ def get(ctx: CLIContext, rule, format): ctx.output.print_fail(f"Field {str(e)} not found") sys.exit(ExitCode.FAILURE) else: - fields = _default_list_fields + fields = _default_get_fields with Session() as session: try: rule_info = session.ServiceAutoScalingRule(uuid.UUID(rule)).get(fields=fields) except (ValueError, BackendAPIError): - rules = session.Network.paginated_list(filter=f'name == "{rule}"', fields=fields) - if rules.total_count == 0: - ctx.output.print_fail(f"Network {rule} not found.") - sys.exit(ExitCode.FAILURE) - rule_info = rules.items[0] + ctx.output.print_fail(f"Network {rule} not found.") + sys.exit(ExitCode.FAILURE) ctx.output.print_item(rule_info, fields) +@auto_scaling_rule.command() +@pass_ctx_obj +@click.argument("rule", type=str, metavar="RULE_ID") +@click.option("--metric-source", type=OptionalType(AutoScalingMetricSource), default=undefined) +@click.option("--metric-name", type=OptionalType(str), default=undefined) +@click.option("--threshold", type=OptionalType(str), default=undefined) +@click.option("--comparator", type=OptionalType(AutoScalingMetricComparator), default=undefined) +@click.option("--step-size", type=OptionalType(int), default=undefined) +@click.option("--cooldown-seconds", type=OptionalType(int), default=undefined) +@click.option( + "--min-replicas", + type=OptionalType(int), + help="Set as -1 to remove min_replicas restriction.", + default=undefined, +) +@click.option( + "--max-replicas", + type=OptionalType(int), + help="Set as -1 to remove max_replicas restriction.", + default=undefined, +) +def update( + ctx: CLIContext, + rule: str, + *, + metric_source: str | Undefined, + metric_name: str | Undefined, + threshold: str | Undefined, + comparator: str | Undefined, + step_size: int | Undefined, + cooldown_seconds: int | Undefined, + min_replicas: Optional[int] | Undefined, + max_replicas: Optional[int] | Undefined, +): + with Session() as session: + try: + _threshold = decimal.Decimal(threshold) if threshold != undefined else undefined + except decimal.InvalidOperation: + ctx.output.print_fail(f"{threshold} is not a valid Decimal") + sys.exit(ExitCode.FAILURE) + + if min_replicas == -1: + min_replicas = None + if max_replicas == -1: + max_replicas = None + + try: + _rule = session.ServiceAutoScalingRule(uuid.UUID(rule)) + _rule.get() + _rule.update( + metric_source=metric_source, + metric_name=metric_name, + threshold=_threshold, + comparator=comparator, + step_size=step_size, + cooldown_seconds=cooldown_seconds, + min_replicas=min_replicas, + max_replicas=max_replicas, + ) + print_done(f"Auto Scaling Rule (ID {_rule.rule_id}) updated.") + except BackendAPIError as e: + ctx.output.print_fail(e.data["title"]) + sys.exit(ExitCode.FAILURE) + + @auto_scaling_rule.command() @pass_ctx_obj @click.argument("rule", type=str, metavar="NETWORK_ID_OR_NAME") @@ -171,8 +236,8 @@ def delete(ctx: CLIContext, rule): try: rule.get(fields=[service_auto_scaling_rule_fields["id"]]) rule.delete() - print_done(f"Network {rule} has been deleted.") + print_done(f"Auto scaling rule {rule.rule_id} has been deleted.") except BackendAPIError as e: - ctx.output.print_fail(f"Failed to delete rule {rule}:") + ctx.output.print_fail(f"Failed to delete rule {rule.rule_id}:") ctx.output.print_error(e) sys.exit(ExitCode.FAILURE) diff --git a/src/ai/backend/client/func/service_auto_scaling_rule.py b/src/ai/backend/client/func/service_auto_scaling_rule.py index 01c2103d616..7bf05d40f81 100644 --- a/src/ai/backend/client/func/service_auto_scaling_rule.py +++ b/src/ai/backend/client/func/service_auto_scaling_rule.py @@ -1,14 +1,16 @@ import textwrap from decimal import Decimal -from typing import Optional, Sequence +from typing import Any, Optional, Sequence from uuid import UUID from ai.backend.client.func.base import BaseFunction, api_function from ai.backend.client.output.types import FieldSpec, RelayPaginatedResult from ai.backend.client.pagination import execute_paginated_relay_query from ai.backend.client.session import api_session +from ai.backend.client.types import set_if_set from ai.backend.common.types import AutoScalingMetricComparator, AutoScalingMetricSource +from ...cli.types import Undefined, undefined from ..output.fields import service_auto_scaling_rule_fields _default_fields: Sequence[FieldSpec] = ( @@ -18,7 +20,6 @@ service_auto_scaling_rule_fields["comparator"], service_auto_scaling_rule_fields["threshold"], service_auto_scaling_rule_fields["endpoint"], - service_auto_scaling_rule_fields["comparator"], service_auto_scaling_rule_fields["step_size"], service_auto_scaling_rule_fields["cooldown_seconds"], service_auto_scaling_rule_fields["min_replicas"], @@ -46,7 +47,7 @@ async def paginated_list( return await execute_paginated_relay_query( "endpoint_auto_scaling_rule_nodes", { - "endpoint": (str(endpoint_id), "String"), + "endpoint": (str(endpoint_id), "String!"), "filter": (filter, "String"), "order": (order, "String"), }, @@ -74,16 +75,16 @@ async def create( """ mutation( $endpoint: String!, - $metric_source: String!, + $metric_source: AutoScalingMetricSource!, $metric_name: String!, $threshold: String!, - $comparator: String!, + $comparator: AutoScalingMetricComparator!, $step_size: Int!, $cooldown_seconds: Int!, $min_replicas: Int, $max_replicas: Int ) { - create_endpoint_auto_scaling_rule( + create_endpoint_auto_scaling_rule_node( endpoint: $endpoint, props: { metric_source: $metric_source, @@ -118,7 +119,7 @@ async def create( }, ) - return cls(rule_id=UUID(data["create_endpoint_auto_scaling_rule"]["rule"]["row_id"])) + return cls(rule_id=UUID(data["create_endpoint_auto_scaling_rule_node"]["rule"]["row_id"])) def __init__(self, rule_id: UUID) -> None: super().__init__() @@ -131,86 +132,67 @@ async def get( ) -> Sequence[dict]: query = textwrap.dedent( """\ - query($rule_id: UUID!) { - endpoint_auto_scaling_rule_node(rule_id: $rule_id) {$fields} + query($rule_id: String!) { + endpoint_auto_scaling_rule_node(id: $rule_id) {$fields} } """ ) query = query.replace("$fields", " ".join(f.field_ref for f in (fields or _default_fields))) - variables = {"rule_id": self.rule_id} + variables = {"rule_id": str(self.rule_id)} data = await api_session.get().Admin._query(query, variables) - return data["endpoint"] + return data["endpoint_auto_scaling_rule_node"] @api_function async def update( self, *, - metric_source: Optional[AutoScalingMetricSource] = None, - metric_name: Optional[str] = None, - threshold: Optional[Decimal] = None, - comparator: Optional[AutoScalingMetricComparator] = None, - step_size: Optional[int] = None, - cooldown_seconds: Optional[int] = None, - min_replicas: Optional[int] = None, - max_replicas: Optional[int] = None, + metric_source: AutoScalingMetricSource | Undefined = undefined, + metric_name: str | Undefined = undefined, + threshold: Decimal | Undefined = undefined, + comparator: AutoScalingMetricComparator | Undefined = undefined, + step_size: int | Undefined = undefined, + cooldown_seconds: int | Undefined = undefined, + min_replicas: Optional[int] | Undefined = undefined, + max_replicas: Optional[int] | Undefined = undefined, ) -> "ServiceAutoScalingRule": q = textwrap.dedent( """ mutation( $rule_id: String!, - $metric_source: String, - $metric_name: String, - $threshold: String, - $comparator: String, - $step_size: Int, - $cooldown_seconds: Int, - $min_replicas: Int, - $max_replicas: Int + $input: ModifyEndpointAutoScalingRuleInput!, ) { - modify_endpoint_auto_scaling_rule( + modify_endpoint_auto_scaling_rule_node( id: $rule_id, - props: { - metric_source: $metric_source, - metric_name: $metric_name, - threshold: $threshold, - comparator: $comparator, - step_size: $step_size, - cooldown_seconds: $cooldown_seconds, - min_replicas: $min_replicas, - max_replicas: $max_replicas - } + props: $input ) { - rule { - ok - msg - } + ok + msg } } """ ) + inputs: dict[str, Any] = {} + set_if_set(inputs, "metric_source", metric_source) + set_if_set(inputs, "metric_name", metric_name) + set_if_set(inputs, "threshold", threshold) + set_if_set(inputs, "comparator", comparator) + set_if_set(inputs, "step_size", step_size) + set_if_set(inputs, "cooldown_seconds", cooldown_seconds) + set_if_set(inputs, "min_replicas", min_replicas) + set_if_set(inputs, "max_replicas", max_replicas) data = await api_session.get().Admin._query( q, - { - "rule_id": str(self.rule_id), - "metric_source": metric_source, - "metric_name": metric_name, - "threshold": threshold, - "comparator": comparator, - "step_size": step_size, - "cooldown_seconds": cooldown_seconds, - "min_replicas": min_replicas, - "max_replicas": max_replicas, - }, + {"rule_id": str(self.rule_id), "input": inputs}, ) - return data["modify_endpoint_auto_scaling_rule"] + return data["modify_endpoint_auto_scaling_rule_node"] @api_function async def delete(self) -> None: q = textwrap.dedent( """ mutation($rule_id: String!) { - delete_endpoint_auto_scaling_rule(id: $rule_id) { + delete_endpoint_auto_scaling_rule_node(id: $rule_id) { ok msg } @@ -222,4 +204,4 @@ async def delete(self) -> None: "rule_id": str(self.rule_id), } data = await api_session.get().Admin._query(q, variables) - return data["delete_endpoint_auto_scaling_rule"] + return data["delete_endpoint_auto_scaling_rule_node"] diff --git a/src/ai/backend/common/types.py b/src/ai/backend/common/types.py index c802cc3886b..48c87bc650b 100644 --- a/src/ai/backend/common/types.py +++ b/src/ai/backend/common/types.py @@ -38,6 +38,7 @@ Union, cast, overload, + override, ) import attrs @@ -1320,13 +1321,99 @@ def metric_string(self) -> str: return result -class AutoScalingMetricSource(enum.StrEnum): - KERNEL = "kernel" - INFERENCE_FRAMEWORK = "inference-framework" +class CIStrEnum(enum.StrEnum): + """ + An StrEnum variant to allow case-insenstive matching of the members while the values are + lowercased. + """ + + @override + @classmethod + def _missing_(cls, value: Any) -> Self | None: + assert isinstance(value, str) # since this is an StrEnum + value = value.lower() + # To prevent infinite recursion, we don't rely on "cls(value)" but manually search the + # members as the official stdlib example suggests. + for member in cls: + if member.value == value: + return member + return None + + # The defualt behavior of `enum.auto()` is to set the value to the lowercased member name. + + @classmethod + def as_trafaret(cls) -> t.Trafaret: + return CIStrEnumTrafaret(cls) + + +class CIUpperStrEnum(CIStrEnum): + """ + An StrEnum variant to allow case-insenstive matching of the members while the values are + UPPERCASED. + """ + + @override + @classmethod + def _missing_(cls, value: Any) -> Self | None: + assert isinstance(value, str) # since this is an StrEnum + value = value.upper() + for member in cls: + if member.value == value: + return member + return None + + @override + @staticmethod + def _generate_next_value_(name, start, count, last_values) -> str: + return name.upper() + + @classmethod + def as_trafaret(cls) -> t.Trafaret: + return CIUpperStrEnumTrafaret(cls) + + +T_enum = TypeVar("T_enum", bound=enum.Enum) + + +class CIStrEnumTrafaret(t.Trafaret, Generic[T_enum]): + """ + A case-insensitive version of trafaret to parse StrEnum values. + """ + + def __init__(self, enum_cls: type[T_enum]) -> None: + self.enum_cls = enum_cls + + def check_and_return(self, value: str) -> T_enum: + try: + # Assume that the enum values are lowercases. + return self.enum_cls(value.lower()) + except (KeyError, ValueError): + self._failure(f"value is not a valid member of {self.enum_cls.__name__}", value=value) + + +class CIUpperStrEnumTrafaret(t.Trafaret, Generic[T_enum]): + """ + A case-insensitive version of trafaret to parse StrEnum values. + """ + + def __init__(self, enum_cls: type[T_enum]) -> None: + self.enum_cls = enum_cls + + def check_and_return(self, value: str) -> T_enum: + try: + # Assume that the enum values are lowercases. + return self.enum_cls(value.upper()) + except (KeyError, ValueError): + self._failure(f"value is not a valid member of {self.enum_cls.__name__}", value=value) + + +class AutoScalingMetricSource(CIUpperStrEnum): + KERNEL = enum.auto() + INFERENCE_FRAMEWORK = enum.auto() -class AutoScalingMetricComparator(enum.StrEnum): - LESS_THAN = "lt" - LESS_THAN_OR_EQUAL = "le" - GREATER_THAN = "gt" - GREATER_THAN_OR_EQUAL = "ge" +class AutoScalingMetricComparator(CIUpperStrEnum): + LESS_THAN = enum.auto() + LESS_THAN_OR_EQUAL = enum.auto() + GREATER_THAN = enum.auto() + GREATER_THAN_OR_EQUAL = enum.auto() diff --git a/src/ai/backend/manager/models/gql_models/endpoint.py b/src/ai/backend/manager/models/gql_models/endpoint.py index 719d967e3ec..03f8f05e34e 100644 --- a/src/ai/backend/manager/models/gql_models/endpoint.py +++ b/src/ai/backend/manager/models/gql_models/endpoint.py @@ -264,6 +264,7 @@ class Meta: f"Available values: {", ".join([p.name for p in AutoScalingMetricSource])}" ), ), + default_value=Undefined, ) metric_name = graphene.String() threshold = graphene.String() @@ -273,7 +274,8 @@ class Meta: description=( f"Available values: {", ".join([p.name for p in AutoScalingMetricComparator])}" ), - ) + ), + default_value=Undefined, ) step_size = graphene.Int() cooldown_seconds = graphene.Int() From 0fa8de858c28bd0863e8f0cc673bf1951d6c0607 Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Thu, 2 Jan 2025 10:32:36 +0000 Subject: [PATCH 39/75] chore: update GraphQL schema dump Co-authored-by: octodog --- src/ai/backend/manager/api/schema.graphql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ai/backend/manager/api/schema.graphql b/src/ai/backend/manager/api/schema.graphql index 938b70c7148..31042a12084 100644 --- a/src/ai/backend/manager/api/schema.graphql +++ b/src/ai/backend/manager/api/schema.graphql @@ -2679,10 +2679,10 @@ type ModifyEndpointAutoScalingRuleNode { """Added in 25.01.0.""" input ModifyEndpointAutoScalingRuleInput { - metric_source: AutoScalingMetricSource = null + metric_source: AutoScalingMetricSource metric_name: String threshold: String - comparator: AutoScalingMetricComparator = null + comparator: AutoScalingMetricComparator step_size: Int cooldown_seconds: Int min_replicas: Int From c0301ebc0ae246da78161a082d3ba9731851f8f0 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Mon, 13 Jan 2025 15:25:43 +0000 Subject: [PATCH 40/75] chore: update api schema dump Co-authored-by: octodog --- docs/manager/rest-reference/openapi.json | 204 +++++++++++------------ 1 file changed, 102 insertions(+), 102 deletions(-) diff --git a/docs/manager/rest-reference/openapi.json b/docs/manager/rest-reference/openapi.json index 6b888d1ea35..925a6a6d3ac 100644 --- a/docs/manager/rest-reference/openapi.json +++ b/docs/manager/rest-reference/openapi.json @@ -1445,7 +1445,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/events/session": { @@ -1515,7 +1515,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/auth": { @@ -2099,7 +2099,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "delete": { "operationId": "folders.delete_by_id", @@ -2155,7 +2155,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "delete": { "operationId": "folders.delete_by_name", @@ -2255,7 +2255,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/_/all-hosts": { @@ -2275,7 +2275,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/_/allowed-types": { @@ -2295,7 +2295,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/_/all_hosts": { @@ -2315,7 +2315,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/_/allowed_types": { @@ -2335,7 +2335,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/_/perf-metric": { @@ -2364,7 +2364,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/{name}/rename": { @@ -2521,7 +2521,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/{name}/request-upload": { @@ -2572,7 +2572,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/{name}/request-download": { @@ -2623,7 +2623,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/{name}/move-file": { @@ -2674,7 +2674,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/{name}/rename-file": { @@ -2729,7 +2729,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/{name}/delete-files": { @@ -2783,7 +2783,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "delete": { "operationId": "folders.delete_files.2", @@ -2829,7 +2829,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/{name}/rename_file": { @@ -2884,7 +2884,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/{name}/delete_files": { @@ -2932,7 +2932,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/{name}/files": { @@ -2969,7 +2969,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/{name}/invite": { @@ -3402,7 +3402,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/invitations/list_sent": { @@ -3422,7 +3422,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/invitations/update/{inv_id}": { @@ -3494,7 +3494,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/invitations/accept": { @@ -3627,7 +3627,7 @@ "in": "query" } ], - "description": "\nList shared vfolders.\n\nNot available for group vfolders.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nList shared vfolders.\n\nNot available for group vfolders.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "post": { "operationId": "folders.update_shared_vfolder", @@ -3743,7 +3743,7 @@ "in": "query" } ], - "description": "\nReturn the contents of `/etc/fstab` file.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\nReturn the contents of `/etc/fstab` file.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/_/mounts": { @@ -3763,7 +3763,7 @@ } ], "parameters": [], - "description": "\nList all mounted vfolder hosts in vfroot.\n\nAll mounted hosts from connected (ALIVE) agents are also gathered.\nGenerally, agents should be configured to have same hosts structure,\nbut newly introduced one may not.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\nList all mounted vfolder hosts in vfroot.\n\nAll mounted hosts from connected (ALIVE) agents are also gathered.\nGenerally, agents should be configured to have same hosts structure,\nbut newly introduced one may not.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" }, "post": { "operationId": "folders.mount_host", @@ -4001,7 +4001,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "post": { "operationId": "folders.update_quota", @@ -4086,7 +4086,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/_/used-bytes": { @@ -4124,7 +4124,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "//graphql": { @@ -4312,7 +4312,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "post": { "operationId": "services.create", @@ -4409,7 +4409,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/services/{service_id}": { @@ -4445,7 +4445,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "delete": { "operationId": "services.delete", @@ -4479,7 +4479,7 @@ } } ], - "description": "\nRemoves model service (and inference sessions for the service also).\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nRemoves model service (and inference sessions for the service also).\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/services/{service_id}/errors": { @@ -4515,7 +4515,7 @@ } } ], - "description": "\nList errors raised while trying to create the inference sessions. Backend.AI will\nstop trying to create an inference session for the model service if six (6) error stacks\nup. The only way to clear the error and retry spawning session is to call\n`clear_error` (POST /services/{service_id}/errors/clear) API.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nList errors raised while trying to create the inference sessions. Backend.AI will\nstop trying to create an inference session for the model service if six (6) error stacks\nup. The only way to clear the error and retry spawning session is to call\n`clear_error` (POST /services/{service_id}/errors/clear) API.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/services/{service_id}/errors/clear": { @@ -4544,7 +4544,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/services/{service_id}/scale": { @@ -4589,7 +4589,7 @@ } } ], - "description": "\nUpdates ideal inference session count manually. Based on the difference of this number,\ninference sessions will be created or removed automatically.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nUpdates ideal inference session count manually. Based on the difference of this number,\ninference sessions will be created or removed automatically.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/services/{service_id}/sync": { @@ -4625,7 +4625,7 @@ } } ], - "description": "\nForce syncs up-to-date model service information with AppProxy.\nIn normal situations this will be automatically handled by Backend.AI schedulers,\nbut this API is left open in case of unexpected restart of AppProxy process.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nForce syncs up-to-date model service information with AppProxy.\nIn normal situations this will be automatically handled by Backend.AI schedulers,\nbut this API is left open in case of unexpected restart of AppProxy process.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/services/{service_id}/routings/{route_id}": { @@ -4678,7 +4678,7 @@ } } ], - "description": "\nUpdates traffic bias of specific route.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nUpdates traffic bias of specific route.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "delete": { "operationId": "services.delete_route", @@ -4720,7 +4720,7 @@ } } ], - "description": "\nScales down the service by removing specific inference session.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nScales down the service by removing specific inference session.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/services/{service_id}/token": { @@ -4765,7 +4765,7 @@ } } ], - "description": "\nGenerates a token which acts as an API key to authenticate when calling model service endpoint.\nIf both duration and valid_until is not set then the AppProxy will determine appropriate lifetime of the token.\nduration and valid_until can't be both specified.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nGenerates a token which acts as an API key to authenticate when calling model service endpoint.\nIf both duration and valid_until is not set then the AppProxy will determine appropriate lifetime of the token.\nduration and valid_until can't be both specified.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session": { @@ -5381,7 +5381,7 @@ "in": "query" } ], - "description": "\nA quick session-ID matcher API for use with auto-completion in CLI.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nA quick session-ID matcher API for use with auto-completion in CLI.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/_/sync-agent-registry": { @@ -5484,7 +5484,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "patch": { "operationId": "session.restart", @@ -5527,7 +5527,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "delete": { "operationId": "session.destroy", @@ -5580,7 +5580,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "post": { "operationId": "session.execute", @@ -5607,7 +5607,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/_/logs": { @@ -5646,7 +5646,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "get": { "operationId": "session.get_task_logs.2", @@ -5674,7 +5674,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/{session_name}/direct-access-info": { @@ -5703,7 +5703,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/{session_name}/logs": { @@ -5741,7 +5741,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/{session_name}/rename": { @@ -5818,7 +5818,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/{session_name}/complete": { @@ -5847,7 +5847,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/{session_name}/shutdown-service": { @@ -5894,7 +5894,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/{session_name}/upload": { @@ -5923,7 +5923,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/{session_name}/download": { @@ -5995,7 +5995,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/{session_name}/download_single": { @@ -6064,7 +6064,7 @@ } } ], - "description": "\nDownload a single file from the scratch root. Only for small files.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nDownload a single file from the scratch root. Only for small files.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/{session_name}/files": { @@ -6093,7 +6093,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/{session_name}/start-service": { @@ -6154,7 +6154,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/{session_name}/commit": { @@ -6348,7 +6348,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/stream/session/{session_name}/pty": { @@ -6377,7 +6377,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/stream/session/{session_name}/execute": { @@ -6406,7 +6406,7 @@ } } ], - "description": "\nWebSocket-version of gateway.kernel.execute().\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nWebSocket-version of gateway.kernel.execute().\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/stream/session/{session_name}/apps": { @@ -6435,7 +6435,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/stream/session/{session_name}/httpproxy": { @@ -6498,7 +6498,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/stream/session/{session_name}/tcpproxy": { @@ -6561,7 +6561,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/manager/status": { @@ -6884,7 +6884,7 @@ } }, "parameters": [], - "description": "\nReturns the list of all resource presets in the current scaling group,\nwith additional information including allocatability of each preset,\namount of total remaining resources, and the current keypair resource limits.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nReturns the list of all resource presets in the current scaling group,\nwith additional information including allocatability of each preset,\namount of total remaining resources, and the current keypair resource limits.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/resource/recalculate-usage": { @@ -6904,7 +6904,7 @@ } ], "parameters": [], - "description": "\nUpdate `keypair_resource_usages` in redis and `agents.c.occupied_slots`.\n\nThose two values are sometimes out of sync. In that case, calling this API\nre-calculates the values for running containers and updates them in DB.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\nUpdate `keypair_resource_usages` in redis and `agents.c.occupied_slots`.\n\nThose two values are sometimes out of sync. In that case, calling this API\nre-calculates the values for running containers and updates them in DB.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/resource/usage/month": { @@ -6945,7 +6945,7 @@ "in": "query" } ], - "description": "\nReturn usage statistics of terminated containers for a specified month.\nThe date/time comparison is done using the configured timezone.\n\n:param group_ids: If not None, query containers only in those groups.\n:param month: The year-month to query usage statistics. ex) \"202006\" to query for Jun 2020\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\nReturn usage statistics of terminated containers for a specified month.\nThe date/time comparison is done using the configured timezone.\n\n:param group_ids: If not None, query containers only in those groups.\n:param month: The year-month to query usage statistics. ex) \"202006\" to query for Jun 2020\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/resource/usage/period": { @@ -6992,7 +6992,7 @@ "in": "query" } ], - "description": "\nReturn usage statistics of terminated containers belonged to the given group for a specified\nperiod in dates.\nThe date/time comparison is done using the configured timezone.\n\n:param project_id: If not None, query containers only in the project.\n:param start_date str: \"yyyymmdd\" format.\n:param end_date str: \"yyyymmdd\" format.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\nReturn usage statistics of terminated containers belonged to the given group for a specified\nperiod in dates.\nThe date/time comparison is done using the configured timezone.\n\n:param project_id: If not None, query containers only in the project.\n:param start_date str: \"yyyymmdd\" format.\n:param end_date str: \"yyyymmdd\" format.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/resource/stats/user/month": { @@ -7012,7 +7012,7 @@ } ], "parameters": [], - "description": "\nReturn time-binned (15 min) stats for terminated user sessions\nover last 30 days.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nReturn time-binned (15 min) stats for terminated user sessions\nover last 30 days.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/resource/stats/admin/month": { @@ -7032,7 +7032,7 @@ } ], "parameters": [], - "description": "\nReturn time-binned (15 min) stats for all terminated sessions\nover last 30 days.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\nReturn time-binned (15 min) stats for all terminated sessions\nover last 30 days.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/resource/watcher": { @@ -7061,7 +7061,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/resource/watcher/agent/start": { @@ -7099,7 +7099,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/resource/watcher/agent/stop": { @@ -7137,7 +7137,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/resource/watcher/agent/restart": { @@ -7175,7 +7175,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/scaling-groups": { @@ -7212,7 +7212,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/scaling-groups/{scaling_group}/wsproxy-version": { @@ -7257,7 +7257,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/template/cluster": { @@ -7308,7 +7308,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "get": { "operationId": "template/cluster.list_template", @@ -7351,7 +7351,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/template/cluster/{template_id}": { @@ -7401,7 +7401,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "put": { "operationId": "template/cluster.put", @@ -7449,7 +7449,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "delete": { "operationId": "template/cluster.delete", @@ -7484,7 +7484,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/template/session": { @@ -7535,7 +7535,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "get": { "operationId": "template/session.list_template", @@ -7578,7 +7578,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/template/session/{template_id}": { @@ -7628,7 +7628,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "put": { "operationId": "template/session.put", @@ -7686,7 +7686,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "delete": { "operationId": "template/session.delete", @@ -7721,7 +7721,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/user-config/dotfiles": { @@ -7771,7 +7771,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "get": { "operationId": "user-config.list_or_get", @@ -7806,7 +7806,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "patch": { "operationId": "user-config.update", @@ -7854,7 +7854,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "delete": { "operationId": "user-config.delete", @@ -7889,7 +7889,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/user-config/bootstrap-script": { @@ -7927,7 +7927,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "get": { "operationId": "user-config.get_bootstrap_script", @@ -7945,7 +7945,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/domain-config/dotfiles": { @@ -7996,7 +7996,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: RUNNING\n" }, "get": { "operationId": "domain-config.list_or_get", @@ -8031,7 +8031,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "patch": { "operationId": "domain-config.update", @@ -8080,7 +8080,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: RUNNING\n" }, "delete": { "operationId": "domain-config.delete", @@ -8115,7 +8115,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: RUNNING\n" } }, "/group-config/dotfiles": { @@ -8177,7 +8177,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: RUNNING\n" }, "get": { "operationId": "group-config.list_or_get", @@ -8228,7 +8228,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "patch": { "operationId": "group-config.update", @@ -8288,7 +8288,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: RUNNING\n" }, "delete": { "operationId": "group-config.delete", @@ -8339,7 +8339,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: RUNNING\n" } }, "/logs/error": { @@ -8411,7 +8411,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "get": { "operationId": "logs/error.list_logs", @@ -8456,7 +8456,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/logs/error/{log_id}/clear": { @@ -8485,7 +8485,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } } } From 65915df56b37b7eedf639e1d89c81def34b0dde6 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Mon, 13 Jan 2025 15:27:38 +0000 Subject: [PATCH 41/75] chore: update api schema dump Co-authored-by: octodog --- docs/manager/rest-reference/openapi.json | 204 +++++++++++------------ 1 file changed, 102 insertions(+), 102 deletions(-) diff --git a/docs/manager/rest-reference/openapi.json b/docs/manager/rest-reference/openapi.json index 925a6a6d3ac..6b888d1ea35 100644 --- a/docs/manager/rest-reference/openapi.json +++ b/docs/manager/rest-reference/openapi.json @@ -1445,7 +1445,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/events/session": { @@ -1515,7 +1515,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/auth": { @@ -2099,7 +2099,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "delete": { "operationId": "folders.delete_by_id", @@ -2155,7 +2155,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "delete": { "operationId": "folders.delete_by_name", @@ -2255,7 +2255,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/_/all-hosts": { @@ -2275,7 +2275,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/_/allowed-types": { @@ -2295,7 +2295,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/_/all_hosts": { @@ -2315,7 +2315,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/_/allowed_types": { @@ -2335,7 +2335,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/_/perf-metric": { @@ -2364,7 +2364,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/{name}/rename": { @@ -2521,7 +2521,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/{name}/request-upload": { @@ -2572,7 +2572,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/{name}/request-download": { @@ -2623,7 +2623,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/{name}/move-file": { @@ -2674,7 +2674,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/{name}/rename-file": { @@ -2729,7 +2729,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/{name}/delete-files": { @@ -2783,7 +2783,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "delete": { "operationId": "folders.delete_files.2", @@ -2829,7 +2829,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/{name}/rename_file": { @@ -2884,7 +2884,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/{name}/delete_files": { @@ -2932,7 +2932,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/{name}/files": { @@ -2969,7 +2969,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/{name}/invite": { @@ -3402,7 +3402,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/invitations/list_sent": { @@ -3422,7 +3422,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/invitations/update/{inv_id}": { @@ -3494,7 +3494,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/invitations/accept": { @@ -3627,7 +3627,7 @@ "in": "query" } ], - "description": "\nList shared vfolders.\n\nNot available for group vfolders.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nList shared vfolders.\n\nNot available for group vfolders.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "post": { "operationId": "folders.update_shared_vfolder", @@ -3743,7 +3743,7 @@ "in": "query" } ], - "description": "\nReturn the contents of `/etc/fstab` file.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\nReturn the contents of `/etc/fstab` file.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/_/mounts": { @@ -3763,7 +3763,7 @@ } ], "parameters": [], - "description": "\nList all mounted vfolder hosts in vfroot.\n\nAll mounted hosts from connected (ALIVE) agents are also gathered.\nGenerally, agents should be configured to have same hosts structure,\nbut newly introduced one may not.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\nList all mounted vfolder hosts in vfroot.\n\nAll mounted hosts from connected (ALIVE) agents are also gathered.\nGenerally, agents should be configured to have same hosts structure,\nbut newly introduced one may not.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" }, "post": { "operationId": "folders.mount_host", @@ -4001,7 +4001,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "post": { "operationId": "folders.update_quota", @@ -4086,7 +4086,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/_/used-bytes": { @@ -4124,7 +4124,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "//graphql": { @@ -4312,7 +4312,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "post": { "operationId": "services.create", @@ -4409,7 +4409,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/services/{service_id}": { @@ -4445,7 +4445,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "delete": { "operationId": "services.delete", @@ -4479,7 +4479,7 @@ } } ], - "description": "\nRemoves model service (and inference sessions for the service also).\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nRemoves model service (and inference sessions for the service also).\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/services/{service_id}/errors": { @@ -4515,7 +4515,7 @@ } } ], - "description": "\nList errors raised while trying to create the inference sessions. Backend.AI will\nstop trying to create an inference session for the model service if six (6) error stacks\nup. The only way to clear the error and retry spawning session is to call\n`clear_error` (POST /services/{service_id}/errors/clear) API.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nList errors raised while trying to create the inference sessions. Backend.AI will\nstop trying to create an inference session for the model service if six (6) error stacks\nup. The only way to clear the error and retry spawning session is to call\n`clear_error` (POST /services/{service_id}/errors/clear) API.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/services/{service_id}/errors/clear": { @@ -4544,7 +4544,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/services/{service_id}/scale": { @@ -4589,7 +4589,7 @@ } } ], - "description": "\nUpdates ideal inference session count manually. Based on the difference of this number,\ninference sessions will be created or removed automatically.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nUpdates ideal inference session count manually. Based on the difference of this number,\ninference sessions will be created or removed automatically.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/services/{service_id}/sync": { @@ -4625,7 +4625,7 @@ } } ], - "description": "\nForce syncs up-to-date model service information with AppProxy.\nIn normal situations this will be automatically handled by Backend.AI schedulers,\nbut this API is left open in case of unexpected restart of AppProxy process.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nForce syncs up-to-date model service information with AppProxy.\nIn normal situations this will be automatically handled by Backend.AI schedulers,\nbut this API is left open in case of unexpected restart of AppProxy process.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/services/{service_id}/routings/{route_id}": { @@ -4678,7 +4678,7 @@ } } ], - "description": "\nUpdates traffic bias of specific route.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nUpdates traffic bias of specific route.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "delete": { "operationId": "services.delete_route", @@ -4720,7 +4720,7 @@ } } ], - "description": "\nScales down the service by removing specific inference session.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nScales down the service by removing specific inference session.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/services/{service_id}/token": { @@ -4765,7 +4765,7 @@ } } ], - "description": "\nGenerates a token which acts as an API key to authenticate when calling model service endpoint.\nIf both duration and valid_until is not set then the AppProxy will determine appropriate lifetime of the token.\nduration and valid_until can't be both specified.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nGenerates a token which acts as an API key to authenticate when calling model service endpoint.\nIf both duration and valid_until is not set then the AppProxy will determine appropriate lifetime of the token.\nduration and valid_until can't be both specified.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session": { @@ -5381,7 +5381,7 @@ "in": "query" } ], - "description": "\nA quick session-ID matcher API for use with auto-completion in CLI.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nA quick session-ID matcher API for use with auto-completion in CLI.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/_/sync-agent-registry": { @@ -5484,7 +5484,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "patch": { "operationId": "session.restart", @@ -5527,7 +5527,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "delete": { "operationId": "session.destroy", @@ -5580,7 +5580,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "post": { "operationId": "session.execute", @@ -5607,7 +5607,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/_/logs": { @@ -5646,7 +5646,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "get": { "operationId": "session.get_task_logs.2", @@ -5674,7 +5674,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/{session_name}/direct-access-info": { @@ -5703,7 +5703,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/{session_name}/logs": { @@ -5741,7 +5741,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/{session_name}/rename": { @@ -5818,7 +5818,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/{session_name}/complete": { @@ -5847,7 +5847,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/{session_name}/shutdown-service": { @@ -5894,7 +5894,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/{session_name}/upload": { @@ -5923,7 +5923,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/{session_name}/download": { @@ -5995,7 +5995,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/{session_name}/download_single": { @@ -6064,7 +6064,7 @@ } } ], - "description": "\nDownload a single file from the scratch root. Only for small files.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nDownload a single file from the scratch root. Only for small files.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/{session_name}/files": { @@ -6093,7 +6093,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/{session_name}/start-service": { @@ -6154,7 +6154,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/{session_name}/commit": { @@ -6348,7 +6348,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/stream/session/{session_name}/pty": { @@ -6377,7 +6377,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/stream/session/{session_name}/execute": { @@ -6406,7 +6406,7 @@ } } ], - "description": "\nWebSocket-version of gateway.kernel.execute().\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nWebSocket-version of gateway.kernel.execute().\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/stream/session/{session_name}/apps": { @@ -6435,7 +6435,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/stream/session/{session_name}/httpproxy": { @@ -6498,7 +6498,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/stream/session/{session_name}/tcpproxy": { @@ -6561,7 +6561,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/manager/status": { @@ -6884,7 +6884,7 @@ } }, "parameters": [], - "description": "\nReturns the list of all resource presets in the current scaling group,\nwith additional information including allocatability of each preset,\namount of total remaining resources, and the current keypair resource limits.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nReturns the list of all resource presets in the current scaling group,\nwith additional information including allocatability of each preset,\namount of total remaining resources, and the current keypair resource limits.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/resource/recalculate-usage": { @@ -6904,7 +6904,7 @@ } ], "parameters": [], - "description": "\nUpdate `keypair_resource_usages` in redis and `agents.c.occupied_slots`.\n\nThose two values are sometimes out of sync. In that case, calling this API\nre-calculates the values for running containers and updates them in DB.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\nUpdate `keypair_resource_usages` in redis and `agents.c.occupied_slots`.\n\nThose two values are sometimes out of sync. In that case, calling this API\nre-calculates the values for running containers and updates them in DB.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/resource/usage/month": { @@ -6945,7 +6945,7 @@ "in": "query" } ], - "description": "\nReturn usage statistics of terminated containers for a specified month.\nThe date/time comparison is done using the configured timezone.\n\n:param group_ids: If not None, query containers only in those groups.\n:param month: The year-month to query usage statistics. ex) \"202006\" to query for Jun 2020\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\nReturn usage statistics of terminated containers for a specified month.\nThe date/time comparison is done using the configured timezone.\n\n:param group_ids: If not None, query containers only in those groups.\n:param month: The year-month to query usage statistics. ex) \"202006\" to query for Jun 2020\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/resource/usage/period": { @@ -6992,7 +6992,7 @@ "in": "query" } ], - "description": "\nReturn usage statistics of terminated containers belonged to the given group for a specified\nperiod in dates.\nThe date/time comparison is done using the configured timezone.\n\n:param project_id: If not None, query containers only in the project.\n:param start_date str: \"yyyymmdd\" format.\n:param end_date str: \"yyyymmdd\" format.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\nReturn usage statistics of terminated containers belonged to the given group for a specified\nperiod in dates.\nThe date/time comparison is done using the configured timezone.\n\n:param project_id: If not None, query containers only in the project.\n:param start_date str: \"yyyymmdd\" format.\n:param end_date str: \"yyyymmdd\" format.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/resource/stats/user/month": { @@ -7012,7 +7012,7 @@ } ], "parameters": [], - "description": "\nReturn time-binned (15 min) stats for terminated user sessions\nover last 30 days.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nReturn time-binned (15 min) stats for terminated user sessions\nover last 30 days.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/resource/stats/admin/month": { @@ -7032,7 +7032,7 @@ } ], "parameters": [], - "description": "\nReturn time-binned (15 min) stats for all terminated sessions\nover last 30 days.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\nReturn time-binned (15 min) stats for all terminated sessions\nover last 30 days.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/resource/watcher": { @@ -7061,7 +7061,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/resource/watcher/agent/start": { @@ -7099,7 +7099,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/resource/watcher/agent/stop": { @@ -7137,7 +7137,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/resource/watcher/agent/restart": { @@ -7175,7 +7175,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/scaling-groups": { @@ -7212,7 +7212,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/scaling-groups/{scaling_group}/wsproxy-version": { @@ -7257,7 +7257,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/template/cluster": { @@ -7308,7 +7308,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "get": { "operationId": "template/cluster.list_template", @@ -7351,7 +7351,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/template/cluster/{template_id}": { @@ -7401,7 +7401,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "put": { "operationId": "template/cluster.put", @@ -7449,7 +7449,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "delete": { "operationId": "template/cluster.delete", @@ -7484,7 +7484,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/template/session": { @@ -7535,7 +7535,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "get": { "operationId": "template/session.list_template", @@ -7578,7 +7578,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/template/session/{template_id}": { @@ -7628,7 +7628,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "put": { "operationId": "template/session.put", @@ -7686,7 +7686,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "delete": { "operationId": "template/session.delete", @@ -7721,7 +7721,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/user-config/dotfiles": { @@ -7771,7 +7771,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "get": { "operationId": "user-config.list_or_get", @@ -7806,7 +7806,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "patch": { "operationId": "user-config.update", @@ -7854,7 +7854,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "delete": { "operationId": "user-config.delete", @@ -7889,7 +7889,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/user-config/bootstrap-script": { @@ -7927,7 +7927,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "get": { "operationId": "user-config.get_bootstrap_script", @@ -7945,7 +7945,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/domain-config/dotfiles": { @@ -7996,7 +7996,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: FROZEN\n" }, "get": { "operationId": "domain-config.list_or_get", @@ -8031,7 +8031,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "patch": { "operationId": "domain-config.update", @@ -8080,7 +8080,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: FROZEN\n" }, "delete": { "operationId": "domain-config.delete", @@ -8115,7 +8115,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: FROZEN\n" } }, "/group-config/dotfiles": { @@ -8177,7 +8177,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: FROZEN\n" }, "get": { "operationId": "group-config.list_or_get", @@ -8228,7 +8228,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "patch": { "operationId": "group-config.update", @@ -8288,7 +8288,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: FROZEN\n" }, "delete": { "operationId": "group-config.delete", @@ -8339,7 +8339,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: FROZEN\n" } }, "/logs/error": { @@ -8411,7 +8411,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "get": { "operationId": "logs/error.list_logs", @@ -8456,7 +8456,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/logs/error/{log_id}/clear": { @@ -8485,7 +8485,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } } } From 8ed577187ba20e86c1094f82c7f74076a68f0d7e Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Tue, 14 Jan 2025 00:27:17 +0900 Subject: [PATCH 42/75] fix: Rebase the alembic migration history --- .../fb89f5d7817b_create_endpoint_auto_scaling_rules_table.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ai/backend/manager/models/alembic/versions/fb89f5d7817b_create_endpoint_auto_scaling_rules_table.py b/src/ai/backend/manager/models/alembic/versions/fb89f5d7817b_create_endpoint_auto_scaling_rules_table.py index 245d7dbdb2c..d4eb2895e84 100644 --- a/src/ai/backend/manager/models/alembic/versions/fb89f5d7817b_create_endpoint_auto_scaling_rules_table.py +++ b/src/ai/backend/manager/models/alembic/versions/fb89f5d7817b_create_endpoint_auto_scaling_rules_table.py @@ -1,7 +1,7 @@ """create endpoint_auto_scaling_rules table Revision ID: fb89f5d7817b -Revises: 0bb88d5a46bf +Revises: ef9a7960d234 Create Date: 2024-12-20 01:48:21.009056 """ @@ -13,7 +13,7 @@ # revision identifiers, used by Alembic. revision = "fb89f5d7817b" -down_revision = "0bb88d5a46bf" +down_revision = "ef9a7960d234" branch_labels = None depends_on = None From 747e5ca5ae251515a6bc4eecd36e57f0b97e89e8 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Tue, 14 Jan 2025 00:43:12 +0900 Subject: [PATCH 43/75] fix: Some obvious missing/mistakes --- src/ai/backend/client/cli/service_auto_scaling_rule.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ai/backend/client/cli/service_auto_scaling_rule.py b/src/ai/backend/client/cli/service_auto_scaling_rule.py index 43da1d0657f..30119a5aa2a 100644 --- a/src/ai/backend/client/cli/service_auto_scaling_rule.py +++ b/src/ai/backend/client/cli/service_auto_scaling_rule.py @@ -98,7 +98,7 @@ def create( @click.option("--order", default=None, help="Set the query ordering expression.") @click.option("--offset", default=0, help="The index of the current page start for pagination.") @click.option("--limit", type=int, default=None, help="The page size for pagination.") -def list(ctx: CLIContext, service: str, format, filter_, order, offset, limit): +def list(ctx: CLIContext, service: str, format, filter_, order, offset, limit) -> None: """List all set auto scaling rules for given model service.""" if format: @@ -140,7 +140,7 @@ def list(ctx: CLIContext, service: str, format, filter_, order, offset, limit): default=None, help="Display only specified fields. When specifying multiple fields separate them with comma (,).", ) -def get(ctx: CLIContext, rule, format): +def get(ctx: CLIContext, rule, format) -> None: """Prints attributes of given auto scaling rule.""" fields: Iterable[Any] if format: @@ -195,7 +195,7 @@ def update( cooldown_seconds: int | Undefined, min_replicas: Optional[int] | Undefined, max_replicas: Optional[int] | Undefined, -): +) -> None: with Session() as session: try: _threshold = decimal.Decimal(threshold) if threshold != undefined else undefined @@ -229,8 +229,8 @@ def update( @auto_scaling_rule.command() @pass_ctx_obj -@click.argument("rule", type=str, metavar="NETWORK_ID_OR_NAME") -def delete(ctx: CLIContext, rule): +@click.argument("rule", type=str, metavar="RULE_ID") +def delete(ctx: CLIContext, rule) -> None: with Session() as session: rule = session.ServiceAutoScalingRule(uuid.UUID(rule)) try: From a9cfd35b9a7c9a0ec6c4fb019aec01567688b7e9 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Tue, 14 Jan 2025 01:19:59 +0900 Subject: [PATCH 44/75] fix: Consistency of docs/comment, var naming, choice params --- src/ai/backend/cli/params.py | 22 +++++-- .../client/cli/service_auto_scaling_rule.py | 58 +++++++++++-------- 2 files changed, 51 insertions(+), 29 deletions(-) diff --git a/src/ai/backend/cli/params.py b/src/ai/backend/cli/params.py index 700edba3b9c..c24de13a04f 100644 --- a/src/ai/backend/cli/params.py +++ b/src/ai/backend/cli/params.py @@ -1,7 +1,16 @@ import json import re from decimal import Decimal -from typing import Any, Generic, Mapping, Optional, Protocol, TypeVar, Union +from typing import ( + Any, + Generic, + Mapping, + Optional, + Protocol, + Self, + TypeVar, + Union, +) import click import trafaret @@ -186,6 +195,7 @@ def convert(self, arg, param, ctx): class SingleValueConstructorType(Protocol): + def __new__(cls, value: Any) -> Self: ... def __init__(self, value: Any) -> None: ... @@ -195,7 +205,7 @@ def __init__(self, value: Any) -> None: ... class OptionalType(click.ParamType, Generic[TScalar]): name = "Optional Type Wrapper" - def __init__(self, type_: type[TScalar] | type[click.ParamType]) -> None: + def __init__(self, type_: type[TScalar] | type[click.ParamType] | click.ParamType) -> None: super().__init__() self.type_ = type_ @@ -203,8 +213,10 @@ def convert(self, value: Any, param, ctx) -> TScalar | Undefined: try: if value is undefined: return undefined - if issubclass(self.type_, click.ParamType): - return self.type_()(value) - return self.type_(value) + match self.type_: + case click.ParamType() | type(): + return self.type_(value) + case _: + return self.type_()(value) except ValueError: self.fail(f"{value!r} is not valid `{self.type_}` or `undefined`", param, ctx) diff --git a/src/ai/backend/client/cli/service_auto_scaling_rule.py b/src/ai/backend/client/cli/service_auto_scaling_rule.py index 30119a5aa2a..8517bf5a3c9 100644 --- a/src/ai/backend/client/cli/service_auto_scaling_rule.py +++ b/src/ai/backend/client/cli/service_auto_scaling_rule.py @@ -30,7 +30,7 @@ @service.group() def auto_scaling_rule(): - """Set of model service auto scaling rule operations""" + """Set of model service auto-scaling rule operations""" @auto_scaling_rule.command() @@ -57,7 +57,7 @@ def create( min_replicas: Optional[int] = None, max_replicas: Optional[int] = None, ) -> None: - """Create a new auto scaling rule.""" + """Create a new auto-scaling rule.""" with Session() as session: try: @@ -79,7 +79,7 @@ def create( min_replicas=min_replicas, max_replicas=max_replicas, ) - print_done(f"Auto Scaling Rule (ID {rule.rule_id}) created.") + print_done(f"Auto-scaling Rule (ID {rule.rule_id}) created.") except Exception as e: ctx.output.print_error(e) sys.exit(ExitCode.FAILURE) @@ -99,7 +99,7 @@ def create( @click.option("--offset", default=0, help="The index of the current page start for pagination.") @click.option("--limit", type=int, default=None, help="The page size for pagination.") def list(ctx: CLIContext, service: str, format, filter_, order, offset, limit) -> None: - """List all set auto scaling rules for given model service.""" + """List all set auto-scaling rules for given model service.""" if format: try: @@ -133,15 +133,15 @@ def list(ctx: CLIContext, service: str, format, filter_, order, offset, limit) - @auto_scaling_rule.command() @pass_ctx_obj -@click.argument("rule", type=str, metavar="RULE_ID") +@click.argument("rule", type=click.UUID, metavar="RULE_ID") @click.option( "-f", "--format", default=None, help="Display only specified fields. When specifying multiple fields separate them with comma (,).", ) -def get(ctx: CLIContext, rule, format) -> None: - """Prints attributes of given auto scaling rule.""" +def get(ctx: CLIContext, rule: uuid.UUID, format: str) -> None: + """Prints attributes of the given auto-scaling rule.""" fields: Iterable[Any] if format: try: @@ -154,21 +154,29 @@ def get(ctx: CLIContext, rule, format) -> None: with Session() as session: try: - rule_info = session.ServiceAutoScalingRule(uuid.UUID(rule)).get(fields=fields) + rule_instance = session.ServiceAutoScalingRule(rule).get(fields=fields) except (ValueError, BackendAPIError): ctx.output.print_fail(f"Network {rule} not found.") sys.exit(ExitCode.FAILURE) - ctx.output.print_item(rule_info, fields) + ctx.output.print_item(rule_instance, fields) @auto_scaling_rule.command() @pass_ctx_obj -@click.argument("rule", type=str, metavar="RULE_ID") -@click.option("--metric-source", type=OptionalType(AutoScalingMetricSource), default=undefined) +@click.argument("rule", type=click.UUID, metavar="RULE_ID") +@click.option( + "--metric-source", + type=OptionalType(click.Choice([*AutoScalingMetricSource], case_sensitive=False)), + default=undefined, +) @click.option("--metric-name", type=OptionalType(str), default=undefined) @click.option("--threshold", type=OptionalType(str), default=undefined) -@click.option("--comparator", type=OptionalType(AutoScalingMetricComparator), default=undefined) +@click.option( + "--comparator", + type=OptionalType(click.Choice([*AutoScalingMetricComparator], case_sensitive=False)), + default=undefined, +) @click.option("--step-size", type=OptionalType(int), default=undefined) @click.option("--cooldown-seconds", type=OptionalType(int), default=undefined) @click.option( @@ -185,7 +193,7 @@ def get(ctx: CLIContext, rule, format) -> None: ) def update( ctx: CLIContext, - rule: str, + rule: uuid.UUID, *, metric_source: str | Undefined, metric_name: str | Undefined, @@ -196,6 +204,7 @@ def update( min_replicas: Optional[int] | Undefined, max_replicas: Optional[int] | Undefined, ) -> None: + """Update attributes of the given auto-scaling rule.""" with Session() as session: try: _threshold = decimal.Decimal(threshold) if threshold != undefined else undefined @@ -209,9 +218,9 @@ def update( max_replicas = None try: - _rule = session.ServiceAutoScalingRule(uuid.UUID(rule)) - _rule.get() - _rule.update( + rule_instance = session.ServiceAutoScalingRule(rule) + rule_instance.get() + rule_instance.update( metric_source=metric_source, metric_name=metric_name, threshold=_threshold, @@ -221,7 +230,7 @@ def update( min_replicas=min_replicas, max_replicas=max_replicas, ) - print_done(f"Auto Scaling Rule (ID {_rule.rule_id}) updated.") + print_done(f"Auto-scaling Rule (ID {rule_instance.rule_id}) updated.") except BackendAPIError as e: ctx.output.print_fail(e.data["title"]) sys.exit(ExitCode.FAILURE) @@ -229,15 +238,16 @@ def update( @auto_scaling_rule.command() @pass_ctx_obj -@click.argument("rule", type=str, metavar="RULE_ID") -def delete(ctx: CLIContext, rule) -> None: +@click.argument("rule", type=click.UUID, metavar="RULE_ID") +def delete(ctx: CLIContext, rule: uuid.UUID) -> None: + """Remove the given auto-scaling rule.""" with Session() as session: - rule = session.ServiceAutoScalingRule(uuid.UUID(rule)) + rule_instance = session.ServiceAutoScalingRule(rule) try: - rule.get(fields=[service_auto_scaling_rule_fields["id"]]) - rule.delete() - print_done(f"Auto scaling rule {rule.rule_id} has been deleted.") + rule_instance.get(fields=[service_auto_scaling_rule_fields["id"]]) + rule_instance.delete() + print_done(f"Autosscaling rule {rule_instance.rule_id} has been deleted.") except BackendAPIError as e: - ctx.output.print_fail(f"Failed to delete rule {rule.rule_id}:") + ctx.output.print_fail(f"Failed to delete rule {rule_instance.rule_id}:") ctx.output.print_error(e) sys.exit(ExitCode.FAILURE) From 36266635c2412238d09fa6538f369c7db9585308 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Tue, 14 Jan 2025 02:18:36 +0900 Subject: [PATCH 45/75] fix: Update type anno --- src/ai/backend/cli/params.py | 84 +++++++++++++++++++++++++++--------- 1 file changed, 63 insertions(+), 21 deletions(-) diff --git a/src/ai/backend/cli/params.py b/src/ai/backend/cli/params.py index c24de13a04f..70f3ecad78c 100644 --- a/src/ai/backend/cli/params.py +++ b/src/ai/backend/cli/params.py @@ -1,15 +1,14 @@ import json import re +from collections.abc import Mapping, Sequence from decimal import Decimal from typing import ( Any, Generic, - Mapping, Optional, Protocol, - Self, TypeVar, - Union, + override, ) import click @@ -21,7 +20,13 @@ class BoolExprType(click.ParamType): name = "boolean" - def convert(self, value, param, ctx): + @override + def convert( + self, + value: str, + param: Optional[click.Parameter], + ctx: Optional[click.Context], + ) -> bool: if isinstance(value, bool): return value try: @@ -43,7 +48,13 @@ class ByteSizeParamType(click.ParamType): "e": 2**60, } - def convert(self, value, param, ctx): + @override + def convert( + self, + value: str, + param: Optional[click.Parameter], + ctx: Optional[click.Context], + ) -> Any: if isinstance(value, int): return value if not isinstance(value, str): @@ -63,7 +74,13 @@ def convert(self, value, param, ctx): class ByteSizeParamCheckType(ByteSizeParamType): name = "byte-check" - def convert(self, value, param, ctx): + @override + def convert( + self, + value: str, + param: Optional[click.Parameter], + ctx: Optional[click.Context], + ) -> str: if isinstance(value, int): return value if not isinstance(value, str): @@ -81,7 +98,13 @@ def convert(self, value, param, ctx): class CommaSeparatedKVListParamType(click.ParamType): name = "comma-seperated-KVList-check" - def convert(self, value: Union[str, Mapping[str, str]], param, ctx) -> Mapping[str, str]: + @override + def convert( + self, + value: str, + param: Optional[click.Parameter], + ctx: Optional[click.Context], + ) -> Mapping[str, str]: if isinstance(value, dict): return value if not isinstance(value, str): @@ -120,9 +143,10 @@ def __init__(self) -> None: super().__init__() self._parsed = False + @override def convert( self, - value: Optional[str], + value: str, param: Optional[click.Parameter], ctx: Optional[click.Context], ) -> Any: @@ -160,8 +184,14 @@ class RangeExprOptionType(click.ParamType): _rx_range_key = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$") name = "Range Expression" - def convert(self, arg, param, ctx): - key, value = arg.split("=", maxsplit=1) + @override + def convert( + self, + value: str, + param: Optional[click.Parameter], + ctx: Optional[click.Context], + ) -> Any: + key, value = value.split("=", maxsplit=1) assert self._rx_range_key.match(key), "The key must be a valid slug string." try: if value.startswith("case:"): @@ -181,12 +211,18 @@ def convert(self, arg, param, ctx): class CommaSeparatedListType(click.ParamType): name = "List Expression" - def convert(self, arg, param, ctx): + @override + def convert( + self, + value: str, + param: Optional[click.Parameter], + ctx: Optional[click.Context], + ) -> Sequence[str]: try: - if isinstance(arg, int): - return arg - elif isinstance(arg, str): - return arg.split(",") + if isinstance(value, int): + return value + elif isinstance(value, str): + return value.split(",") except ValueError as e: self.fail(repr(e), param, ctx) @@ -195,28 +231,34 @@ def convert(self, arg, param, ctx): class SingleValueConstructorType(Protocol): - def __new__(cls, value: Any) -> Self: ... def __init__(self, value: Any) -> None: ... -TScalar = TypeVar("TScalar", bound=SingleValueConstructorType) +TScalar = TypeVar("TScalar", bound=SingleValueConstructorType | click.ParamType) class OptionalType(click.ParamType, Generic[TScalar]): name = "Optional Type Wrapper" - def __init__(self, type_: type[TScalar] | type[click.ParamType] | click.ParamType) -> None: + def __init__(self, type_: type[TScalar] | click.ParamType) -> None: super().__init__() self.type_ = type_ - def convert(self, value: Any, param, ctx) -> TScalar | Undefined: + def convert( + self, + value: str, + param: Optional[click.Parameter], + ctx: Optional[click.Context], + ) -> TScalar | Undefined: try: if value is undefined: return undefined match self.type_: - case click.ParamType() | type(): + case click.ParamType(): return self.type_(value) - case _: + case type() if issubclass(self.type_, click.ParamType): return self.type_()(value) + case _: + return self.type_(value) except ValueError: self.fail(f"{value!r} is not valid `{self.type_}` or `undefined`", param, ctx) From 61005007584f7dc5dd3d2b0c2ccad110ee88d60a Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Mon, 13 Jan 2025 17:20:43 +0000 Subject: [PATCH 46/75] chore: update api schema dump Co-authored-by: octodog --- docs/manager/rest-reference/openapi.json | 204 +++++++++++------------ 1 file changed, 102 insertions(+), 102 deletions(-) diff --git a/docs/manager/rest-reference/openapi.json b/docs/manager/rest-reference/openapi.json index 6b888d1ea35..925a6a6d3ac 100644 --- a/docs/manager/rest-reference/openapi.json +++ b/docs/manager/rest-reference/openapi.json @@ -1445,7 +1445,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/events/session": { @@ -1515,7 +1515,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/auth": { @@ -2099,7 +2099,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "delete": { "operationId": "folders.delete_by_id", @@ -2155,7 +2155,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "delete": { "operationId": "folders.delete_by_name", @@ -2255,7 +2255,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/_/all-hosts": { @@ -2275,7 +2275,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/_/allowed-types": { @@ -2295,7 +2295,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/_/all_hosts": { @@ -2315,7 +2315,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/_/allowed_types": { @@ -2335,7 +2335,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/_/perf-metric": { @@ -2364,7 +2364,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/{name}/rename": { @@ -2521,7 +2521,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/{name}/request-upload": { @@ -2572,7 +2572,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/{name}/request-download": { @@ -2623,7 +2623,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/{name}/move-file": { @@ -2674,7 +2674,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/{name}/rename-file": { @@ -2729,7 +2729,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/{name}/delete-files": { @@ -2783,7 +2783,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "delete": { "operationId": "folders.delete_files.2", @@ -2829,7 +2829,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/{name}/rename_file": { @@ -2884,7 +2884,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/{name}/delete_files": { @@ -2932,7 +2932,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/{name}/files": { @@ -2969,7 +2969,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/{name}/invite": { @@ -3402,7 +3402,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/invitations/list_sent": { @@ -3422,7 +3422,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/invitations/update/{inv_id}": { @@ -3494,7 +3494,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/invitations/accept": { @@ -3627,7 +3627,7 @@ "in": "query" } ], - "description": "\nList shared vfolders.\n\nNot available for group vfolders.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nList shared vfolders.\n\nNot available for group vfolders.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "post": { "operationId": "folders.update_shared_vfolder", @@ -3743,7 +3743,7 @@ "in": "query" } ], - "description": "\nReturn the contents of `/etc/fstab` file.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\nReturn the contents of `/etc/fstab` file.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/_/mounts": { @@ -3763,7 +3763,7 @@ } ], "parameters": [], - "description": "\nList all mounted vfolder hosts in vfroot.\n\nAll mounted hosts from connected (ALIVE) agents are also gathered.\nGenerally, agents should be configured to have same hosts structure,\nbut newly introduced one may not.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\nList all mounted vfolder hosts in vfroot.\n\nAll mounted hosts from connected (ALIVE) agents are also gathered.\nGenerally, agents should be configured to have same hosts structure,\nbut newly introduced one may not.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" }, "post": { "operationId": "folders.mount_host", @@ -4001,7 +4001,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "post": { "operationId": "folders.update_quota", @@ -4086,7 +4086,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/folders/_/used-bytes": { @@ -4124,7 +4124,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "//graphql": { @@ -4312,7 +4312,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "post": { "operationId": "services.create", @@ -4409,7 +4409,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/services/{service_id}": { @@ -4445,7 +4445,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "delete": { "operationId": "services.delete", @@ -4479,7 +4479,7 @@ } } ], - "description": "\nRemoves model service (and inference sessions for the service also).\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nRemoves model service (and inference sessions for the service also).\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/services/{service_id}/errors": { @@ -4515,7 +4515,7 @@ } } ], - "description": "\nList errors raised while trying to create the inference sessions. Backend.AI will\nstop trying to create an inference session for the model service if six (6) error stacks\nup. The only way to clear the error and retry spawning session is to call\n`clear_error` (POST /services/{service_id}/errors/clear) API.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nList errors raised while trying to create the inference sessions. Backend.AI will\nstop trying to create an inference session for the model service if six (6) error stacks\nup. The only way to clear the error and retry spawning session is to call\n`clear_error` (POST /services/{service_id}/errors/clear) API.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/services/{service_id}/errors/clear": { @@ -4544,7 +4544,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/services/{service_id}/scale": { @@ -4589,7 +4589,7 @@ } } ], - "description": "\nUpdates ideal inference session count manually. Based on the difference of this number,\ninference sessions will be created or removed automatically.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nUpdates ideal inference session count manually. Based on the difference of this number,\ninference sessions will be created or removed automatically.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/services/{service_id}/sync": { @@ -4625,7 +4625,7 @@ } } ], - "description": "\nForce syncs up-to-date model service information with AppProxy.\nIn normal situations this will be automatically handled by Backend.AI schedulers,\nbut this API is left open in case of unexpected restart of AppProxy process.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nForce syncs up-to-date model service information with AppProxy.\nIn normal situations this will be automatically handled by Backend.AI schedulers,\nbut this API is left open in case of unexpected restart of AppProxy process.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/services/{service_id}/routings/{route_id}": { @@ -4678,7 +4678,7 @@ } } ], - "description": "\nUpdates traffic bias of specific route.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nUpdates traffic bias of specific route.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "delete": { "operationId": "services.delete_route", @@ -4720,7 +4720,7 @@ } } ], - "description": "\nScales down the service by removing specific inference session.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nScales down the service by removing specific inference session.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/services/{service_id}/token": { @@ -4765,7 +4765,7 @@ } } ], - "description": "\nGenerates a token which acts as an API key to authenticate when calling model service endpoint.\nIf both duration and valid_until is not set then the AppProxy will determine appropriate lifetime of the token.\nduration and valid_until can't be both specified.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nGenerates a token which acts as an API key to authenticate when calling model service endpoint.\nIf both duration and valid_until is not set then the AppProxy will determine appropriate lifetime of the token.\nduration and valid_until can't be both specified.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session": { @@ -5381,7 +5381,7 @@ "in": "query" } ], - "description": "\nA quick session-ID matcher API for use with auto-completion in CLI.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nA quick session-ID matcher API for use with auto-completion in CLI.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/_/sync-agent-registry": { @@ -5484,7 +5484,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "patch": { "operationId": "session.restart", @@ -5527,7 +5527,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "delete": { "operationId": "session.destroy", @@ -5580,7 +5580,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "post": { "operationId": "session.execute", @@ -5607,7 +5607,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/_/logs": { @@ -5646,7 +5646,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "get": { "operationId": "session.get_task_logs.2", @@ -5674,7 +5674,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/{session_name}/direct-access-info": { @@ -5703,7 +5703,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/{session_name}/logs": { @@ -5741,7 +5741,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/{session_name}/rename": { @@ -5818,7 +5818,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/{session_name}/complete": { @@ -5847,7 +5847,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/{session_name}/shutdown-service": { @@ -5894,7 +5894,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/{session_name}/upload": { @@ -5923,7 +5923,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/{session_name}/download": { @@ -5995,7 +5995,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/{session_name}/download_single": { @@ -6064,7 +6064,7 @@ } } ], - "description": "\nDownload a single file from the scratch root. Only for small files.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nDownload a single file from the scratch root. Only for small files.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/{session_name}/files": { @@ -6093,7 +6093,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/{session_name}/start-service": { @@ -6154,7 +6154,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/session/{session_name}/commit": { @@ -6348,7 +6348,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/stream/session/{session_name}/pty": { @@ -6377,7 +6377,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/stream/session/{session_name}/execute": { @@ -6406,7 +6406,7 @@ } } ], - "description": "\nWebSocket-version of gateway.kernel.execute().\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nWebSocket-version of gateway.kernel.execute().\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/stream/session/{session_name}/apps": { @@ -6435,7 +6435,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/stream/session/{session_name}/httpproxy": { @@ -6498,7 +6498,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/stream/session/{session_name}/tcpproxy": { @@ -6561,7 +6561,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/manager/status": { @@ -6884,7 +6884,7 @@ } }, "parameters": [], - "description": "\nReturns the list of all resource presets in the current scaling group,\nwith additional information including allocatability of each preset,\namount of total remaining resources, and the current keypair resource limits.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nReturns the list of all resource presets in the current scaling group,\nwith additional information including allocatability of each preset,\namount of total remaining resources, and the current keypair resource limits.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/resource/recalculate-usage": { @@ -6904,7 +6904,7 @@ } ], "parameters": [], - "description": "\nUpdate `keypair_resource_usages` in redis and `agents.c.occupied_slots`.\n\nThose two values are sometimes out of sync. In that case, calling this API\nre-calculates the values for running containers and updates them in DB.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\nUpdate `keypair_resource_usages` in redis and `agents.c.occupied_slots`.\n\nThose two values are sometimes out of sync. In that case, calling this API\nre-calculates the values for running containers and updates them in DB.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/resource/usage/month": { @@ -6945,7 +6945,7 @@ "in": "query" } ], - "description": "\nReturn usage statistics of terminated containers for a specified month.\nThe date/time comparison is done using the configured timezone.\n\n:param group_ids: If not None, query containers only in those groups.\n:param month: The year-month to query usage statistics. ex) \"202006\" to query for Jun 2020\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\nReturn usage statistics of terminated containers for a specified month.\nThe date/time comparison is done using the configured timezone.\n\n:param group_ids: If not None, query containers only in those groups.\n:param month: The year-month to query usage statistics. ex) \"202006\" to query for Jun 2020\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/resource/usage/period": { @@ -6992,7 +6992,7 @@ "in": "query" } ], - "description": "\nReturn usage statistics of terminated containers belonged to the given group for a specified\nperiod in dates.\nThe date/time comparison is done using the configured timezone.\n\n:param project_id: If not None, query containers only in the project.\n:param start_date str: \"yyyymmdd\" format.\n:param end_date str: \"yyyymmdd\" format.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\nReturn usage statistics of terminated containers belonged to the given group for a specified\nperiod in dates.\nThe date/time comparison is done using the configured timezone.\n\n:param project_id: If not None, query containers only in the project.\n:param start_date str: \"yyyymmdd\" format.\n:param end_date str: \"yyyymmdd\" format.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/resource/stats/user/month": { @@ -7012,7 +7012,7 @@ } ], "parameters": [], - "description": "\nReturn time-binned (15 min) stats for terminated user sessions\nover last 30 days.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\nReturn time-binned (15 min) stats for terminated user sessions\nover last 30 days.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/resource/stats/admin/month": { @@ -7032,7 +7032,7 @@ } ], "parameters": [], - "description": "\nReturn time-binned (15 min) stats for all terminated sessions\nover last 30 days.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\nReturn time-binned (15 min) stats for all terminated sessions\nover last 30 days.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/resource/watcher": { @@ -7061,7 +7061,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/resource/watcher/agent/start": { @@ -7099,7 +7099,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/resource/watcher/agent/stop": { @@ -7137,7 +7137,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/resource/watcher/agent/restart": { @@ -7175,7 +7175,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" } }, "/scaling-groups": { @@ -7212,7 +7212,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/scaling-groups/{scaling_group}/wsproxy-version": { @@ -7257,7 +7257,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/template/cluster": { @@ -7308,7 +7308,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "get": { "operationId": "template/cluster.list_template", @@ -7351,7 +7351,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/template/cluster/{template_id}": { @@ -7401,7 +7401,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "put": { "operationId": "template/cluster.put", @@ -7449,7 +7449,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "delete": { "operationId": "template/cluster.delete", @@ -7484,7 +7484,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/template/session": { @@ -7535,7 +7535,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "get": { "operationId": "template/session.list_template", @@ -7578,7 +7578,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/template/session/{template_id}": { @@ -7628,7 +7628,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "put": { "operationId": "template/session.put", @@ -7686,7 +7686,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "delete": { "operationId": "template/session.delete", @@ -7721,7 +7721,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/user-config/dotfiles": { @@ -7771,7 +7771,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "get": { "operationId": "user-config.list_or_get", @@ -7806,7 +7806,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "patch": { "operationId": "user-config.update", @@ -7854,7 +7854,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "delete": { "operationId": "user-config.delete", @@ -7889,7 +7889,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/user-config/bootstrap-script": { @@ -7927,7 +7927,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "get": { "operationId": "user-config.get_bootstrap_script", @@ -7945,7 +7945,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/domain-config/dotfiles": { @@ -7996,7 +7996,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: RUNNING\n" }, "get": { "operationId": "domain-config.list_or_get", @@ -8031,7 +8031,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "patch": { "operationId": "domain-config.update", @@ -8080,7 +8080,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: RUNNING\n" }, "delete": { "operationId": "domain-config.delete", @@ -8115,7 +8115,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: RUNNING\n" } }, "/group-config/dotfiles": { @@ -8177,7 +8177,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: RUNNING\n" }, "get": { "operationId": "group-config.list_or_get", @@ -8228,7 +8228,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "patch": { "operationId": "group-config.update", @@ -8288,7 +8288,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: RUNNING\n" }, "delete": { "operationId": "group-config.delete", @@ -8339,7 +8339,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: RUNNING\n" } }, "/logs/error": { @@ -8411,7 +8411,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" }, "get": { "operationId": "logs/error.list_logs", @@ -8456,7 +8456,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } }, "/logs/error/{log_id}/clear": { @@ -8485,7 +8485,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" } } } From c4973737d7642c84e6152a15c9d2aeb44daa8a5c Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Tue, 14 Jan 2025 02:26:49 +0900 Subject: [PATCH 47/75] fix: Update release version --- src/ai/backend/manager/models/gql.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ai/backend/manager/models/gql.py b/src/ai/backend/manager/models/gql.py index 5536a1b28d9..98cdd9a4b6b 100644 --- a/src/ai/backend/manager/models/gql.py +++ b/src/ai/backend/manager/models/gql.py @@ -343,13 +343,13 @@ class Mutations(graphene.ObjectType): ) create_endpoint_auto_scaling_rule_node = CreateEndpointAutoScalingRuleNode.Field( - description="Added in 24.12.0." + description="Added in 25.1.0." ) modify_endpoint_auto_scaling_rule_node = ModifyEndpointAutoScalingRuleNode.Field( - description="Added in 24.12.0." + description="Added in 25.1.0." ) delete_endpoint_auto_scaling_rule_node = DeleteEndpointAutoScalingRuleNode.Field( - description="Added in 24.12.0." + description="Added in 25.1.0." ) # Legacy mutations @@ -919,13 +919,13 @@ class Queries(graphene.ObjectType): endpoint_auto_scaling_rule_node = graphene.Field( EndpointAutoScalingRuleNode, id=graphene.String(required=True), - description="Added in 24.12.0.", + description="Added in 25.1.0.", ) endpoint_auto_scaling_rule_nodes = PaginatedConnectionField( EndpointAutoScalingRuleConnection, endpoint=graphene.String(required=True), - description="Added in 24.12.0.", + description="Added in 25.1.0.", ) @staticmethod From 1fe54c1d97930727329dec7824041acdcc242997 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Mon, 13 Jan 2025 17:29:18 +0000 Subject: [PATCH 48/75] chore: update api schema dump Co-authored-by: octodog --- docs/manager/graphql-reference/schema.graphql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/manager/graphql-reference/schema.graphql b/docs/manager/graphql-reference/schema.graphql index 31042a12084..189102c937c 100644 --- a/docs/manager/graphql-reference/schema.graphql +++ b/docs/manager/graphql-reference/schema.graphql @@ -221,10 +221,10 @@ type Queries { """Added in 24.12.0.""" networks(filter: String, order: String, offset: Int, before: String, after: String, first: Int, last: Int): NetworkConnection - """Added in 24.12.0.""" + """Added in 25.1.0.""" endpoint_auto_scaling_rule_node(id: String!): EndpointAutoScalingRuleNode - """Added in 24.12.0.""" + """Added in 25.1.0.""" endpoint_auto_scaling_rule_nodes(endpoint: String!, filter: String, order: String, offset: Int, before: String, after: String, first: Int, last: Int): EndpointAutoScalingRuleConnection } @@ -1910,13 +1910,13 @@ type Mutations { id: String! ): DeleteContainerRegistryNode - """Added in 24.12.0.""" + """Added in 25.1.0.""" create_endpoint_auto_scaling_rule_node(endpoint: String!, props: EndpointAutoScalingRuleInput!): CreateEndpointAutoScalingRuleNode - """Added in 24.12.0.""" + """Added in 25.1.0.""" modify_endpoint_auto_scaling_rule_node(id: String!, props: ModifyEndpointAutoScalingRuleInput!): ModifyEndpointAutoScalingRuleNode - """Added in 24.12.0.""" + """Added in 25.1.0.""" delete_endpoint_auto_scaling_rule_node(id: String!): DeleteEndpointAutoScalingRuleNode create_container_registry(hostname: String!, props: CreateContainerRegistryInput!): CreateContainerRegistry modify_container_registry(hostname: String!, props: ModifyContainerRegistryInput!): ModifyContainerRegistry From 86806e0542576f5e07519716862aba494cb3b4f0 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Mon, 13 Jan 2025 17:31:08 +0000 Subject: [PATCH 49/75] chore: update api schema dump Co-authored-by: octodog --- docs/manager/rest-reference/openapi.json | 204 +++++++++++------------ 1 file changed, 102 insertions(+), 102 deletions(-) diff --git a/docs/manager/rest-reference/openapi.json b/docs/manager/rest-reference/openapi.json index 925a6a6d3ac..6b888d1ea35 100644 --- a/docs/manager/rest-reference/openapi.json +++ b/docs/manager/rest-reference/openapi.json @@ -1445,7 +1445,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/events/session": { @@ -1515,7 +1515,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/auth": { @@ -2099,7 +2099,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "delete": { "operationId": "folders.delete_by_id", @@ -2155,7 +2155,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "delete": { "operationId": "folders.delete_by_name", @@ -2255,7 +2255,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/_/all-hosts": { @@ -2275,7 +2275,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/_/allowed-types": { @@ -2295,7 +2295,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/_/all_hosts": { @@ -2315,7 +2315,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/_/allowed_types": { @@ -2335,7 +2335,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/_/perf-metric": { @@ -2364,7 +2364,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/{name}/rename": { @@ -2521,7 +2521,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/{name}/request-upload": { @@ -2572,7 +2572,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/{name}/request-download": { @@ -2623,7 +2623,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/{name}/move-file": { @@ -2674,7 +2674,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/{name}/rename-file": { @@ -2729,7 +2729,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/{name}/delete-files": { @@ -2783,7 +2783,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "delete": { "operationId": "folders.delete_files.2", @@ -2829,7 +2829,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/{name}/rename_file": { @@ -2884,7 +2884,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/{name}/delete_files": { @@ -2932,7 +2932,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/{name}/files": { @@ -2969,7 +2969,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/{name}/invite": { @@ -3402,7 +3402,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/invitations/list_sent": { @@ -3422,7 +3422,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/invitations/update/{inv_id}": { @@ -3494,7 +3494,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/invitations/accept": { @@ -3627,7 +3627,7 @@ "in": "query" } ], - "description": "\nList shared vfolders.\n\nNot available for group vfolders.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nList shared vfolders.\n\nNot available for group vfolders.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "post": { "operationId": "folders.update_shared_vfolder", @@ -3743,7 +3743,7 @@ "in": "query" } ], - "description": "\nReturn the contents of `/etc/fstab` file.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\nReturn the contents of `/etc/fstab` file.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/_/mounts": { @@ -3763,7 +3763,7 @@ } ], "parameters": [], - "description": "\nList all mounted vfolder hosts in vfroot.\n\nAll mounted hosts from connected (ALIVE) agents are also gathered.\nGenerally, agents should be configured to have same hosts structure,\nbut newly introduced one may not.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\nList all mounted vfolder hosts in vfroot.\n\nAll mounted hosts from connected (ALIVE) agents are also gathered.\nGenerally, agents should be configured to have same hosts structure,\nbut newly introduced one may not.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" }, "post": { "operationId": "folders.mount_host", @@ -4001,7 +4001,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "post": { "operationId": "folders.update_quota", @@ -4086,7 +4086,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/folders/_/used-bytes": { @@ -4124,7 +4124,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "//graphql": { @@ -4312,7 +4312,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "post": { "operationId": "services.create", @@ -4409,7 +4409,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/services/{service_id}": { @@ -4445,7 +4445,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "delete": { "operationId": "services.delete", @@ -4479,7 +4479,7 @@ } } ], - "description": "\nRemoves model service (and inference sessions for the service also).\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nRemoves model service (and inference sessions for the service also).\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/services/{service_id}/errors": { @@ -4515,7 +4515,7 @@ } } ], - "description": "\nList errors raised while trying to create the inference sessions. Backend.AI will\nstop trying to create an inference session for the model service if six (6) error stacks\nup. The only way to clear the error and retry spawning session is to call\n`clear_error` (POST /services/{service_id}/errors/clear) API.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nList errors raised while trying to create the inference sessions. Backend.AI will\nstop trying to create an inference session for the model service if six (6) error stacks\nup. The only way to clear the error and retry spawning session is to call\n`clear_error` (POST /services/{service_id}/errors/clear) API.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/services/{service_id}/errors/clear": { @@ -4544,7 +4544,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/services/{service_id}/scale": { @@ -4589,7 +4589,7 @@ } } ], - "description": "\nUpdates ideal inference session count manually. Based on the difference of this number,\ninference sessions will be created or removed automatically.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nUpdates ideal inference session count manually. Based on the difference of this number,\ninference sessions will be created or removed automatically.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/services/{service_id}/sync": { @@ -4625,7 +4625,7 @@ } } ], - "description": "\nForce syncs up-to-date model service information with AppProxy.\nIn normal situations this will be automatically handled by Backend.AI schedulers,\nbut this API is left open in case of unexpected restart of AppProxy process.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nForce syncs up-to-date model service information with AppProxy.\nIn normal situations this will be automatically handled by Backend.AI schedulers,\nbut this API is left open in case of unexpected restart of AppProxy process.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/services/{service_id}/routings/{route_id}": { @@ -4678,7 +4678,7 @@ } } ], - "description": "\nUpdates traffic bias of specific route.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nUpdates traffic bias of specific route.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "delete": { "operationId": "services.delete_route", @@ -4720,7 +4720,7 @@ } } ], - "description": "\nScales down the service by removing specific inference session.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nScales down the service by removing specific inference session.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/services/{service_id}/token": { @@ -4765,7 +4765,7 @@ } } ], - "description": "\nGenerates a token which acts as an API key to authenticate when calling model service endpoint.\nIf both duration and valid_until is not set then the AppProxy will determine appropriate lifetime of the token.\nduration and valid_until can't be both specified.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nGenerates a token which acts as an API key to authenticate when calling model service endpoint.\nIf both duration and valid_until is not set then the AppProxy will determine appropriate lifetime of the token.\nduration and valid_until can't be both specified.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session": { @@ -5381,7 +5381,7 @@ "in": "query" } ], - "description": "\nA quick session-ID matcher API for use with auto-completion in CLI.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nA quick session-ID matcher API for use with auto-completion in CLI.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/_/sync-agent-registry": { @@ -5484,7 +5484,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "patch": { "operationId": "session.restart", @@ -5527,7 +5527,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "delete": { "operationId": "session.destroy", @@ -5580,7 +5580,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "post": { "operationId": "session.execute", @@ -5607,7 +5607,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/_/logs": { @@ -5646,7 +5646,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "get": { "operationId": "session.get_task_logs.2", @@ -5674,7 +5674,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/{session_name}/direct-access-info": { @@ -5703,7 +5703,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/{session_name}/logs": { @@ -5741,7 +5741,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/{session_name}/rename": { @@ -5818,7 +5818,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/{session_name}/complete": { @@ -5847,7 +5847,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/{session_name}/shutdown-service": { @@ -5894,7 +5894,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/{session_name}/upload": { @@ -5923,7 +5923,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/{session_name}/download": { @@ -5995,7 +5995,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/{session_name}/download_single": { @@ -6064,7 +6064,7 @@ } } ], - "description": "\nDownload a single file from the scratch root. Only for small files.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nDownload a single file from the scratch root. Only for small files.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/{session_name}/files": { @@ -6093,7 +6093,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/{session_name}/start-service": { @@ -6154,7 +6154,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/session/{session_name}/commit": { @@ -6348,7 +6348,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/stream/session/{session_name}/pty": { @@ -6377,7 +6377,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/stream/session/{session_name}/execute": { @@ -6406,7 +6406,7 @@ } } ], - "description": "\nWebSocket-version of gateway.kernel.execute().\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nWebSocket-version of gateway.kernel.execute().\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/stream/session/{session_name}/apps": { @@ -6435,7 +6435,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/stream/session/{session_name}/httpproxy": { @@ -6498,7 +6498,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/stream/session/{session_name}/tcpproxy": { @@ -6561,7 +6561,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/manager/status": { @@ -6884,7 +6884,7 @@ } }, "parameters": [], - "description": "\nReturns the list of all resource presets in the current scaling group,\nwith additional information including allocatability of each preset,\namount of total remaining resources, and the current keypair resource limits.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nReturns the list of all resource presets in the current scaling group,\nwith additional information including allocatability of each preset,\namount of total remaining resources, and the current keypair resource limits.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/resource/recalculate-usage": { @@ -6904,7 +6904,7 @@ } ], "parameters": [], - "description": "\nUpdate `keypair_resource_usages` in redis and `agents.c.occupied_slots`.\n\nThose two values are sometimes out of sync. In that case, calling this API\nre-calculates the values for running containers and updates them in DB.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\nUpdate `keypair_resource_usages` in redis and `agents.c.occupied_slots`.\n\nThose two values are sometimes out of sync. In that case, calling this API\nre-calculates the values for running containers and updates them in DB.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/resource/usage/month": { @@ -6945,7 +6945,7 @@ "in": "query" } ], - "description": "\nReturn usage statistics of terminated containers for a specified month.\nThe date/time comparison is done using the configured timezone.\n\n:param group_ids: If not None, query containers only in those groups.\n:param month: The year-month to query usage statistics. ex) \"202006\" to query for Jun 2020\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\nReturn usage statistics of terminated containers for a specified month.\nThe date/time comparison is done using the configured timezone.\n\n:param group_ids: If not None, query containers only in those groups.\n:param month: The year-month to query usage statistics. ex) \"202006\" to query for Jun 2020\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/resource/usage/period": { @@ -6992,7 +6992,7 @@ "in": "query" } ], - "description": "\nReturn usage statistics of terminated containers belonged to the given group for a specified\nperiod in dates.\nThe date/time comparison is done using the configured timezone.\n\n:param project_id: If not None, query containers only in the project.\n:param start_date str: \"yyyymmdd\" format.\n:param end_date str: \"yyyymmdd\" format.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\nReturn usage statistics of terminated containers belonged to the given group for a specified\nperiod in dates.\nThe date/time comparison is done using the configured timezone.\n\n:param project_id: If not None, query containers only in the project.\n:param start_date str: \"yyyymmdd\" format.\n:param end_date str: \"yyyymmdd\" format.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/resource/stats/user/month": { @@ -7012,7 +7012,7 @@ } ], "parameters": [], - "description": "\nReturn time-binned (15 min) stats for terminated user sessions\nover last 30 days.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\nReturn time-binned (15 min) stats for terminated user sessions\nover last 30 days.\n\n\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/resource/stats/admin/month": { @@ -7032,7 +7032,7 @@ } ], "parameters": [], - "description": "\nReturn time-binned (15 min) stats for all terminated sessions\nover last 30 days.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\nReturn time-binned (15 min) stats for all terminated sessions\nover last 30 days.\n\n\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/resource/watcher": { @@ -7061,7 +7061,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/resource/watcher/agent/start": { @@ -7099,7 +7099,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/resource/watcher/agent/stop": { @@ -7137,7 +7137,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/resource/watcher/agent/restart": { @@ -7175,7 +7175,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Superadmin privilege required.\n* Manager status required: FROZEN\n" } }, "/scaling-groups": { @@ -7212,7 +7212,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/scaling-groups/{scaling_group}/wsproxy-version": { @@ -7257,7 +7257,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/template/cluster": { @@ -7308,7 +7308,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "get": { "operationId": "template/cluster.list_template", @@ -7351,7 +7351,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/template/cluster/{template_id}": { @@ -7401,7 +7401,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "put": { "operationId": "template/cluster.put", @@ -7449,7 +7449,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "delete": { "operationId": "template/cluster.delete", @@ -7484,7 +7484,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/template/session": { @@ -7535,7 +7535,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "get": { "operationId": "template/session.list_template", @@ -7578,7 +7578,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/template/session/{template_id}": { @@ -7628,7 +7628,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "put": { "operationId": "template/session.put", @@ -7686,7 +7686,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "delete": { "operationId": "template/session.delete", @@ -7721,7 +7721,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/user-config/dotfiles": { @@ -7771,7 +7771,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "get": { "operationId": "user-config.list_or_get", @@ -7806,7 +7806,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "patch": { "operationId": "user-config.update", @@ -7854,7 +7854,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "delete": { "operationId": "user-config.delete", @@ -7889,7 +7889,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/user-config/bootstrap-script": { @@ -7927,7 +7927,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "get": { "operationId": "user-config.get_bootstrap_script", @@ -7945,7 +7945,7 @@ } ], "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/domain-config/dotfiles": { @@ -7996,7 +7996,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: FROZEN\n" }, "get": { "operationId": "domain-config.list_or_get", @@ -8031,7 +8031,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "patch": { "operationId": "domain-config.update", @@ -8080,7 +8080,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: FROZEN\n" }, "delete": { "operationId": "domain-config.delete", @@ -8115,7 +8115,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: FROZEN\n" } }, "/group-config/dotfiles": { @@ -8177,7 +8177,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: FROZEN\n" }, "get": { "operationId": "group-config.list_or_get", @@ -8228,7 +8228,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "patch": { "operationId": "group-config.update", @@ -8288,7 +8288,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: FROZEN\n" }, "delete": { "operationId": "group-config.delete", @@ -8339,7 +8339,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* Admin privilege required.\n* Manager status required: FROZEN\n" } }, "/logs/error": { @@ -8411,7 +8411,7 @@ } }, "parameters": [], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" }, "get": { "operationId": "logs/error.list_logs", @@ -8456,7 +8456,7 @@ "in": "query" } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } }, "/logs/error/{log_id}/clear": { @@ -8485,7 +8485,7 @@ } } ], - "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: RUNNING\n" + "description": "\n**Preconditions:**\n* User privilege required.\n* Manager status required: FROZEN\n" } } } From 97902b6387704b321a41dd6e6d9e129a4ad5ab84 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Tue, 14 Jan 2025 13:07:15 +0900 Subject: [PATCH 50/75] fix: Update missing/stale type anno and fix minor mistakes in model service CLI --- src/ai/backend/client/cli/service.py | 72 ++++++++++++------- .../client/cli/service_auto_scaling_rule.py | 38 +++++++--- 2 files changed, 73 insertions(+), 37 deletions(-) diff --git a/src/ai/backend/client/cli/service.py b/src/ai/backend/client/cli/service.py index fac3c76d40b..5bc67923a79 100644 --- a/src/ai/backend/client/cli/service.py +++ b/src/ai/backend/client/cli/service.py @@ -17,6 +17,7 @@ from ai.backend.common.arch import DEFAULT_IMAGE_ARCH from ai.backend.common.types import ClusterMode +from ..exceptions import BackendError from ..output.fields import routing_fields, service_fields from ..output.types import FieldSpec from .extensions import pass_ctx_obj @@ -43,23 +44,22 @@ ) -def get_service_id(session: Session, name_or_id: str): +def get_service_id(session: Session, name_or_id: str) -> UUID: try: session.Service(name_or_id).info() - return name_or_id - except Exception: + except (ValueError, BackendError): services = session.Service.list(name=name_or_id) try: - return services[0]["id"] - except Exception as e: - if isinstance(e, KeyError) or isinstance(e, IndexError): - raise RuntimeError(f"Service {name_or_id} not found") - else: - raise e + return UUID(services[0]["id"]) + except (KeyError, IndexError): + raise RuntimeError(f"Service {name_or_id!r} not found") + else: + # When we can fetch the detail directly, it's a valid UUID. + return UUID(name_or_id) @main.group() -def service(): +def service() -> None: """Set of service operations""" @@ -69,7 +69,13 @@ def service(): @click.option("--order", default=None, help="Set the query ordering expression.") @click.option("--offset", default=0, help="The index of the current page start for pagination.") @click.option("--limit", type=int, default=None, help="The page size for pagination.") -def list(ctx: CLIContext, filter_, order, offset, limit): +def list( + ctx: CLIContext, + filter_: Optional[str], + order: Optional[str], + offset: int, + limit: Optional[int], +) -> None: """ List the service endpoints. """ @@ -95,7 +101,7 @@ def list(ctx: CLIContext, filter_, order, offset, limit): @service.command() @pass_ctx_obj @click.argument("service_name_or_id", metavar="SERVICE_NAME_OR_ID", type=str) -def info(ctx: CLIContext, service_name_or_id: str): +def info(ctx: CLIContext, service_name_or_id: str) -> None: """ Display the detail of a service endpoint with its backing inference session. @@ -287,7 +293,7 @@ def create( owner: Optional[str], model_definition_path: Optional[str], public: bool, -): +) -> None: """ Create a service endpoint with a backing inference session. @@ -470,7 +476,7 @@ def try_start( resources: Sequence[str], resource_opts: Sequence[str], cluster_size: int, - cluster_mode: Literal["single-node", "multi-node"], + cluster_mode: ClusterMode, domain: Optional[str], project: Optional[str], bootstrap_script: Optional[str], @@ -479,7 +485,7 @@ def try_start( scaling_group: Optional[str], owner: Optional[str], public: bool, -): +) -> None: """ Tries to create a model service session and return whether the server has successfully started or not. @@ -566,7 +572,7 @@ async def try_start_tracker(bgtask_id): @service.command() @pass_ctx_obj @click.argument("service_name_or_id", metavar="SERVICE_NAME_OR_ID", type=str) -def rm(ctx: CLIContext, service_name_or_id): +def rm(ctx: CLIContext, service_name_or_id: str) -> None: """ Remove the service endpoint. @@ -585,7 +591,7 @@ def rm(ctx: CLIContext, service_name_or_id): @service.command() @pass_ctx_obj @click.argument("service_name_or_id", metavar="SERVICE_NAME_OR_ID", type=str) -def sync(ctx: CLIContext, service_name_or_id: str): +def sync(ctx: CLIContext, service_name_or_id: str) -> None: """ Sync route status with AppProxy. @@ -606,7 +612,11 @@ def sync(ctx: CLIContext, service_name_or_id: str): @pass_ctx_obj @click.argument("service_name_or_id", metavar="SERVICE_NAME_OR_ID", type=str) @click.argument("target_count", metavar="COUNT", type=int) -def scale(ctx: CLIContext, service_name_or_id: str, target_count: int): +def scale( + ctx: CLIContext, + service_name_or_id: str, + target_count: int, +) -> None: """ Start or resume the service endpoint to handle the incoming traffic. @@ -629,7 +639,12 @@ def scale(ctx: CLIContext, service_name_or_id: str, target_count: int): @click.argument("service_name_or_id", metavar="SERVICE_NAME_OR_ID", type=str) @click.argument("duration", metavar="DURATION", type=str) @click.option("-q", "--quiet", is_flag=True) -def generate_token(ctx: CLIContext, service_name_or_id: str, duration: str, quiet: bool): +def generate_token( + ctx: CLIContext, + service_name_or_id: str, + duration: str, + quiet: bool, +) -> None: """ Generate an API token to communicate with inference endpoint. @@ -654,7 +669,7 @@ def generate_token(ctx: CLIContext, service_name_or_id: str, duration: str, quie @service.command() @pass_ctx_obj @click.argument("service_name_or_id", metavar="SERVICE_NAME_OR_ID", type=str) -def get_endpoint(ctx: CLIContext, service_name_or_id: str): +def get_endpoint(ctx: CLIContext, service_name_or_id: str) -> None: """ Returns API Endpoint URL of the service. @@ -674,9 +689,14 @@ def get_endpoint(ctx: CLIContext, service_name_or_id: str): @service.command() @pass_ctx_obj @click.argument("service_name_or_id", metavar="SERVICE_NAME_OR_ID", type=str) -@click.argument("route_id", metavar="ROUTE_ID", type=str) +@click.argument("route_id", metavar="ROUTE_ID", type=click.UUID) @click.argument("ratio", metavar="RATIO", type=float) -def update_traffic_ratio(ctx: CLIContext, service_name_or_id: str, route_id: str, ratio: float): +def update_traffic_ratio( + ctx: CLIContext, + service_name_or_id: str, + route_id: UUID, + ratio: float, +) -> None: """ Update traffic ratio of single route. @@ -688,7 +708,7 @@ def update_traffic_ratio(ctx: CLIContext, service_name_or_id: str, route_id: str with Session() as session: try: service_id = get_service_id(session, service_name_or_id) - session.Service(service_id).update_traffic_ratio(UUID(route_id), ratio) + session.Service(service_id).update_traffic_ratio(route_id, ratio) print_done("Done.") except Exception as e: ctx.output.print_error(e) @@ -698,9 +718,9 @@ def update_traffic_ratio(ctx: CLIContext, service_name_or_id: str, route_id: str @service.command() @pass_ctx_obj @click.argument("service_name_or_id", metavar="SERVICE_NAME_OR_ID", type=str) -@click.argument("route_id", metavar="ROUTE_ID", type=str) +@click.argument("route_id", metavar="ROUTE_ID", type=click.UUID) @click.argument("ratio", metavar="RATIO", type=float) -def downscale_route(ctx: CLIContext, service_name_or_id: str, route_id: str): +def downscale_route(ctx: CLIContext, service_name_or_id: str, route_id: UUID) -> None: """ Destroy route and its associated session and decrement desired session count of endpoint @@ -712,7 +732,7 @@ def downscale_route(ctx: CLIContext, service_name_or_id: str, route_id: str): with Session() as session: try: service_id = get_service_id(session, service_name_or_id) - session.Service(service_id).downscale_single_route(UUID(route_id)) + session.Service(service_id).downscale_single_route(route_id) print_done("Done.") except Exception as e: ctx.output.print_error(e) diff --git a/src/ai/backend/client/cli/service_auto_scaling_rule.py b/src/ai/backend/client/cli/service_auto_scaling_rule.py index 8517bf5a3c9..79efc2b6f25 100644 --- a/src/ai/backend/client/cli/service_auto_scaling_rule.py +++ b/src/ai/backend/client/cli/service_auto_scaling_rule.py @@ -1,7 +1,7 @@ import decimal import sys -import uuid from typing import Any, Iterable, Optional +from uuid import UUID import click @@ -29,17 +29,25 @@ @service.group() -def auto_scaling_rule(): +def auto_scaling_rule() -> None: """Set of model service auto-scaling rule operations""" @auto_scaling_rule.command() @pass_ctx_obj @click.argument("service", type=str, metavar="SERVICE_NAME_OR_ID") -@click.option("--metric-source", type=click.Choice([*AutoScalingMetricSource]), required=True) +@click.option( + "--metric-source", + type=click.Choice([*AutoScalingMetricSource], case_sensitive=False), + required=True, +) @click.option("--metric-name", type=str, required=True) @click.option("--threshold", type=str, required=True) -@click.option("--comparator", type=click.Choice([*AutoScalingMetricComparator]), required=True) +@click.option( + "--comparator", + type=click.Choice([*AutoScalingMetricComparator], case_sensitive=False), + required=True, +) @click.option("--step-size", type=int, required=True) @click.option("--cooldown-seconds", type=int, required=True) @click.option("--min-replicas", type=int) @@ -67,7 +75,7 @@ def create( sys.exit(ExitCode.FAILURE) try: - service_id = uuid.UUID(get_service_id(session, service)) + service_id = get_service_id(session, service) rule = session.ServiceAutoScalingRule.create( service_id, metric_source, @@ -98,7 +106,15 @@ def create( @click.option("--order", default=None, help="Set the query ordering expression.") @click.option("--offset", default=0, help="The index of the current page start for pagination.") @click.option("--limit", type=int, default=None, help="The page size for pagination.") -def list(ctx: CLIContext, service: str, format, filter_, order, offset, limit) -> None: +def list( + ctx: CLIContext, + service: str, + format: Optional[str], + filter_: Optional[str], + order: Optional[str], + offset: int, + limit: Optional[int], +) -> None: """List all set auto-scaling rules for given model service.""" if format: @@ -110,7 +126,7 @@ def list(ctx: CLIContext, service: str, format, filter_, order, offset, limit) - else: fields = None with Session() as session: - service_id = uuid.UUID(get_service_id(session, service)) + service_id = get_service_id(session, service) try: fetch_func = lambda pg_offset, pg_size: session.ServiceAutoScalingRule.paginated_list( @@ -140,7 +156,7 @@ def list(ctx: CLIContext, service: str, format, filter_, order, offset, limit) - default=None, help="Display only specified fields. When specifying multiple fields separate them with comma (,).", ) -def get(ctx: CLIContext, rule: uuid.UUID, format: str) -> None: +def get(ctx: CLIContext, rule: UUID, format: str) -> None: """Prints attributes of the given auto-scaling rule.""" fields: Iterable[Any] if format: @@ -156,7 +172,7 @@ def get(ctx: CLIContext, rule: uuid.UUID, format: str) -> None: try: rule_instance = session.ServiceAutoScalingRule(rule).get(fields=fields) except (ValueError, BackendAPIError): - ctx.output.print_fail(f"Network {rule} not found.") + ctx.output.print_fail(f"Rule {rule!r} not found.") sys.exit(ExitCode.FAILURE) ctx.output.print_item(rule_instance, fields) @@ -193,7 +209,7 @@ def get(ctx: CLIContext, rule: uuid.UUID, format: str) -> None: ) def update( ctx: CLIContext, - rule: uuid.UUID, + rule: UUID, *, metric_source: str | Undefined, metric_name: str | Undefined, @@ -239,7 +255,7 @@ def update( @auto_scaling_rule.command() @pass_ctx_obj @click.argument("rule", type=click.UUID, metavar="RULE_ID") -def delete(ctx: CLIContext, rule: uuid.UUID) -> None: +def delete(ctx: CLIContext, rule: UUID) -> None: """Remove the given auto-scaling rule.""" with Session() as session: rule_instance = session.ServiceAutoScalingRule(rule) From 4fb3e492bbddf044467947c087be66eb7c4c22a1 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Tue, 14 Jan 2025 13:40:02 +0900 Subject: [PATCH 51/75] fix: Update type anno --- src/ai/backend/client/func/service.py | 31 ++++++++++--------- .../client/func/service_auto_scaling_rule.py | 17 +++++----- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/src/ai/backend/client/func/service.py b/src/ai/backend/client/func/service.py index 8fb720f1589..0b0afd2aac1 100644 --- a/src/ai/backend/client/func/service.py +++ b/src/ai/backend/client/func/service.py @@ -1,7 +1,9 @@ -from typing import Any, Literal, Mapping, Optional, Sequence +from collections.abc import Mapping, Sequence +from typing import Any, Literal, Optional from uuid import UUID from faker import Faker +from typing_extensions import deprecated from ai.backend.common.arch import DEFAULT_IMAGE_ARCH @@ -30,10 +32,10 @@ class Service(BaseFunction): id: UUID + @deprecated("Use paginated_list() instead of this method.") @api_function @classmethod - async def list(cls, name: Optional[str] = None): - """ """ + async def list(cls, name: Optional[str] = None) -> list[dict[str, Any]]: params = {} if name: params["name"] = name @@ -51,8 +53,7 @@ async def paginated_list( page_size: int = 20, filter: Optional[str] = None, order: Optional[str] = None, - ) -> PaginatedResult: - """ """ + ) -> PaginatedResult[dict[str, Any]]: return await fetch_paginated_result( "endpoint_list", { @@ -70,7 +71,7 @@ async def detail( cls, service_id: str, fields: Sequence[FieldSpec] = _default_fields, - ) -> Sequence[dict]: + ) -> Sequence[dict[str, Any]]: query = _d(""" query($endpoint_id: UUID!) { endpoint(endpoint_id: $endpoint_id) { $fields } @@ -111,7 +112,7 @@ async def create( owner_access_key: Optional[str] = None, model_definition_path: Optional[str] = None, expose_to_public=False, - ) -> Any: + ) -> dict[str, Any]: """ Creates an inference service. @@ -237,7 +238,7 @@ async def try_start( scaling_group: Optional[str] = None, owner_access_key: Optional[str] = None, expose_to_public=False, - ) -> Any: + ) -> dict[str, Any]: """ Tries to start an inference session and terminates immediately. @@ -307,46 +308,46 @@ def __init__(self, id: str | UUID) -> None: self.id = id if isinstance(id, UUID) else UUID(id) @api_function - async def info(self): + async def info(self) -> dict[str, Any]: rqst = Request("GET", f"/services/{self.id}") async with rqst.fetch() as resp: return await resp.json() @api_function - async def delete(self): + async def delete(self) -> dict[str, Any]: rqst = Request("DELETE", f"/services/{self.id}") async with rqst.fetch() as resp: return await resp.json() @api_function - async def sync(self): + async def sync(self) -> dict[str, Any]: rqst = Request("POST", f"/services/{self.id}/sync") async with rqst.fetch() as resp: return await resp.json() @api_function - async def scale(self, to: int): + async def scale(self, to: int) -> dict[str, Any]: rqst = Request("POST", f"/services/{self.id}/scale") rqst.set_json({"to": to}) async with rqst.fetch() as resp: return await resp.json() @api_function - async def generate_api_token(self, duration: str): + async def generate_api_token(self, duration: str) -> dict[str, Any]: rqst = Request("POST", f"/services/{self.id}/token") rqst.set_json({"duration": duration}) async with rqst.fetch() as resp: return await resp.json() @api_function - async def update_traffic_ratio(self, target_route_id: UUID, new_ratio: float): + async def update_traffic_ratio(self, target_route_id: UUID, new_ratio: float) -> dict[str, Any]: rqst = Request("PUT", f"/services/{self.id}/routings/{target_route_id}") rqst.set_json({"traffic_ratio": new_ratio}) async with rqst.fetch() as resp: return await resp.json() @api_function - async def downscale_single_route(self, target_route_id: UUID): + async def downscale_single_route(self, target_route_id: UUID) -> dict[str, Any]: rqst = Request("DELETE", f"/services/{self.id}/routings/{target_route_id}") async with rqst.fetch() as resp: return await resp.json() diff --git a/src/ai/backend/client/func/service_auto_scaling_rule.py b/src/ai/backend/client/func/service_auto_scaling_rule.py index 7bf05d40f81..7f84c785549 100644 --- a/src/ai/backend/client/func/service_auto_scaling_rule.py +++ b/src/ai/backend/client/func/service_auto_scaling_rule.py @@ -1,17 +1,18 @@ import textwrap +from collections.abc import Sequence from decimal import Decimal -from typing import Any, Optional, Sequence +from typing import Any, Optional, Self from uuid import UUID -from ai.backend.client.func.base import BaseFunction, api_function -from ai.backend.client.output.types import FieldSpec, RelayPaginatedResult -from ai.backend.client.pagination import execute_paginated_relay_query -from ai.backend.client.session import api_session -from ai.backend.client.types import set_if_set from ai.backend.common.types import AutoScalingMetricComparator, AutoScalingMetricSource from ...cli.types import Undefined, undefined from ..output.fields import service_auto_scaling_rule_fields +from ..output.types import FieldSpec, RelayPaginatedResult +from ..pagination import execute_paginated_relay_query +from ..session import api_session +from ..types import set_if_set +from .base import BaseFunction, api_function _default_fields: Sequence[FieldSpec] = ( service_auto_scaling_rule_fields["id"], @@ -70,7 +71,7 @@ async def create( *, min_replicas: Optional[int] = None, max_replicas: Optional[int] = None, - ) -> "ServiceAutoScalingRule": + ) -> Self: q = textwrap.dedent( """ mutation( @@ -154,7 +155,7 @@ async def update( cooldown_seconds: int | Undefined = undefined, min_replicas: Optional[int] | Undefined = undefined, max_replicas: Optional[int] | Undefined = undefined, - ) -> "ServiceAutoScalingRule": + ) -> Self: q = textwrap.dedent( """ mutation( From 3aa937363a915c55e36fb60f72b4b60bbc5a0962 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Tue, 14 Jan 2025 13:43:54 +0900 Subject: [PATCH 52/75] doc: Add condition about when plain list() is allowed --- src/ai/backend/client/func/service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ai/backend/client/func/service.py b/src/ai/backend/client/func/service.py index 0b0afd2aac1..9137b7909e3 100644 --- a/src/ai/backend/client/func/service.py +++ b/src/ai/backend/client/func/service.py @@ -32,7 +32,7 @@ class Service(BaseFunction): id: UUID - @deprecated("Use paginated_list() instead of this method.") + @deprecated("Use paginated_list() instead of this method unless you set the name filter.") @api_function @classmethod async def list(cls, name: Optional[str] = None) -> list[dict[str, Any]]: From 486916432d001316b01a5d775588516d14477241 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Fri, 17 Jan 2025 06:21:37 +0000 Subject: [PATCH 53/75] chore: update api schema dump Co-authored-by: octodog --- docs/manager/graphql-reference/schema.graphql | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/docs/manager/graphql-reference/schema.graphql b/docs/manager/graphql-reference/schema.graphql index 189102c937c..9545e04daea 100644 --- a/docs/manager/graphql-reference/schema.graphql +++ b/docs/manager/graphql-reference/schema.graphql @@ -1477,6 +1477,7 @@ type QuotaDetails { hard_limit_bytes: BigInt } +"""Deprecated since 24.09.0. use `ContainerRegistryNode` instead""" type ContainerRegistry implements Node { """The ID of the object""" id: ID! @@ -1484,6 +1485,7 @@ type ContainerRegistry implements Node { config: ContainerRegistryConfig } +"""Deprecated since 24.09.0.""" type ContainerRegistryConfig { url: String! type: String! @@ -1776,7 +1778,11 @@ type Mutations { create_keypair(props: KeyPairInput!, user_id: String!): CreateKeyPair modify_keypair(access_key: String!, props: ModifyKeyPairInput!): ModifyKeyPair delete_keypair(access_key: String!): DeleteKeyPair - rescan_images(registry: String): RescanImages + rescan_images( + """Added in 25.1.0.""" + project: String + registry: String + ): RescanImages preload_image(references: [String]!, target_agents: [String]!): PreloadImage unload_image(references: [String]!, target_agents: [String]!): UnloadImage modify_image(architecture: String = "x86_64", props: ModifyImageInput!, target: String!): ModifyImage @@ -1918,8 +1924,14 @@ type Mutations { """Added in 25.1.0.""" delete_endpoint_auto_scaling_rule_node(id: String!): DeleteEndpointAutoScalingRuleNode + + """Deprecated since 24.09.0. use `CreateContainerRegistryNode` instead""" create_container_registry(hostname: String!, props: CreateContainerRegistryInput!): CreateContainerRegistry + + """Deprecated since 24.09.0. use `ModifyContainerRegistryNode` instead""" modify_container_registry(hostname: String!, props: ModifyContainerRegistryInput!): ModifyContainerRegistry + + """Deprecated since 24.09.0. use `DeleteContainerRegistryNode` instead""" delete_container_registry(hostname: String!): DeleteContainerRegistry modify_endpoint(endpoint_id: UUID!, props: ModifyEndpointInput!): ModifyEndpoint @@ -2695,10 +2707,12 @@ type DeleteEndpointAutoScalingRuleNode { msg: String } +"""Deprecated since 24.09.0. use `CreateContainerRegistryNode` instead""" type CreateContainerRegistry { container_registry: ContainerRegistry } +"""Deprecated since 24.09.0.""" input CreateContainerRegistryInput { url: String! type: String! @@ -2711,10 +2725,12 @@ input CreateContainerRegistryInput { is_global: Boolean } +"""Deprecated since 24.09.0. use `ModifyContainerRegistryNode` instead""" type ModifyContainerRegistry { container_registry: ContainerRegistry } +"""Deprecated since 24.09.0.""" input ModifyContainerRegistryInput { url: String type: String @@ -2727,6 +2743,7 @@ input ModifyContainerRegistryInput { is_global: Boolean } +"""Deprecated since 24.09.0. use `DeleteContainerRegistryNode` instead""" type DeleteContainerRegistry { container_registry: ContainerRegistry } From a7519b06bb2be47b8b16723e5baf1fa7d2f03887 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Fri, 17 Jan 2025 15:22:49 +0900 Subject: [PATCH 54/75] fix: ordering of the `@deprecated` decorator --- src/ai/backend/client/func/service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ai/backend/client/func/service.py b/src/ai/backend/client/func/service.py index 9137b7909e3..497aacebeb0 100644 --- a/src/ai/backend/client/func/service.py +++ b/src/ai/backend/client/func/service.py @@ -32,9 +32,9 @@ class Service(BaseFunction): id: UUID - @deprecated("Use paginated_list() instead of this method unless you set the name filter.") @api_function @classmethod + @deprecated("Use paginated_list() instead of this method unless you set the name filter.") async def list(cls, name: Optional[str] = None) -> list[dict[str, Any]]: params = {} if name: From 9e4835f9d24c41598d1b1aa981ca35b347698c8e Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Fri, 17 Jan 2025 16:48:15 +0900 Subject: [PATCH 55/75] refactor: Add missing __all__ entries in common.types --- src/ai/backend/common/types.py | 262 +++++++++++++++++++-------------- 1 file changed, 154 insertions(+), 108 deletions(-) diff --git a/src/ai/backend/common/types.py b/src/ai/backend/common/types.py index c0634c5a2d7..2e6c7c2bf9f 100644 --- a/src/ai/backend/common/types.py +++ b/src/ai/backend/common/types.py @@ -54,28 +54,54 @@ __all__ = ( "aobject", + "Sentinel", + "QueueSentinel", + "CIStrEnum", + "CIUpperStrEnum", + "CIStrEnumTrafaret", + "CIUpperStrEnumTrafaret", "JSONSerializableMixin", + "check_typed_tuple", + "check_typed_dict", "DeviceId", "ContainerId", "EndpointId", "SessionId", "KernelId", + "SessionTypes", + "SessionResult", + "ResourceGroupID", + "AgentId", + "DeviceName", + "AccessKey", + "SecretKey", "MetricKey", "MetricValue", "MovingStatValue", + "Quantum", "PID", "HostPID", "ContainerPID", "BinarySize", "HostPortPair", - "DeviceId", - "SlotName", - "IntrinsicSlotNames", + "ImageRegistry", + "ImageConfig", + "AutoPullBehavior", + "ServicePort", "ResourceSlot", "ResourceGroupType", - "ReadableCIDR", + "SlotName", + "SlotTypes", + "IntrinsicSlotNames", + "DefaultForUnspecified", + "HandlerForUnknownSlotName", "HardwareMetadata", - "ModelServiceStatus", + "AcceleratorNumberFormat", + "AcceleratorMetadata", + "DeviceModelInfo", + "ComputedDeviceCapacity", + "AbstractPermission", + "MountExpression", "MountPermission", "MountPermissionLiteral", "MountTypes", @@ -84,22 +110,42 @@ "QuotaScopeID", "VFolderUsageMode", "VFolderMount", + "VFolderHostPermission", + "VolumeMountableNodeType", + "QuotaScopeType", "QuotaConfig", + "SessionEnqueueingConfig", "KernelCreationConfig", + "KernelEnqueueingConfig", "KernelCreationResult", "ServicePortProtocols", "ClusterInfo", "ClusterMode", "ClusterSSHKeyPair", - "check_typed_dict", + "ClusterSSHPortMapping", "EtcdRedisConfig", + "ReadableCIDR", "RedisConnectionInfo", + "RedisHelperConfig", + "AgentSelectionStrategy", + "SchedulerStatus", + "AbuseReportValue", + "AbuseReport", + "ModelServiceStatus", + "ModelServiceProfile", "RuntimeVariant", + "PromMetric", + "PromMetricGroup", + "PromMetricPrimitive", "AutoScalingMetricSource", "AutoScalingMetricComparator", "MODEL_SERVICE_RUNTIME_PROFILES", + "ItemResult", + "ResultSet", + "safe_print_redis_config", ) + if TYPE_CHECKING: from .docker import ImageRef @@ -146,6 +192,101 @@ async def __ainit__(self) -> None: pass +class Sentinel(enum.Enum): + TOKEN = 0 + + +class QueueSentinel(enum.Enum): + CLOSED = 0 + TIMEOUT = 1 + + +class CIStrEnum(enum.StrEnum): + """ + An StrEnum variant to allow case-insenstive matching of the members while the values are + lowercased. + """ + + @override + @classmethod + def _missing_(cls, value: Any) -> Self | None: + assert isinstance(value, str) # since this is an StrEnum + value = value.lower() + # To prevent infinite recursion, we don't rely on "cls(value)" but manually search the + # members as the official stdlib example suggests. + for member in cls: + if member.value == value: + return member + return None + + # The defualt behavior of `enum.auto()` is to set the value to the lowercased member name. + + @classmethod + def as_trafaret(cls) -> t.Trafaret: + return CIStrEnumTrafaret(cls) + + +class CIUpperStrEnum(CIStrEnum): + """ + An StrEnum variant to allow case-insenstive matching of the members while the values are + UPPERCASED. + """ + + @override + @classmethod + def _missing_(cls, value: Any) -> Self | None: + assert isinstance(value, str) # since this is an StrEnum + value = value.upper() + for member in cls: + if member.value == value: + return member + return None + + @override + @staticmethod + def _generate_next_value_(name, start, count, last_values) -> str: + return name.upper() + + @classmethod + def as_trafaret(cls) -> t.Trafaret: + return CIUpperStrEnumTrafaret(cls) + + +T_enum = TypeVar("T_enum", bound=enum.Enum) + + +class CIStrEnumTrafaret(t.Trafaret, Generic[T_enum]): + """ + A case-insensitive version of trafaret to parse StrEnum values. + """ + + def __init__(self, enum_cls: type[T_enum]) -> None: + self.enum_cls = enum_cls + + def check_and_return(self, value: str) -> T_enum: + try: + # Assume that the enum values are lowercases. + return self.enum_cls(value.lower()) + except (KeyError, ValueError): + self._failure(f"value is not a valid member of {self.enum_cls.__name__}", value=value) + + +class CIUpperStrEnumTrafaret(t.Trafaret, Generic[T_enum]): + """ + A case-insensitive version of trafaret to parse StrEnum values. + """ + + def __init__(self, enum_cls: type[T_enum]) -> None: + self.enum_cls = enum_cls + + def check_and_return(self, value: str) -> T_enum: + try: + # Assume that the enum values are lowercases. + return self.enum_cls(value.upper()) + except (KeyError, ValueError): + self._failure(f"value is not a valid member of {self.enum_cls.__name__}", value=value) + + T1 = TypeVar("T1") T2 = TypeVar("T2") T3 = TypeVar("T3") @@ -1145,15 +1286,6 @@ def _stringify_number(v: Union[BinarySize, int, float, Decimal]) -> str: return result -class Sentinel(enum.Enum): - TOKEN = 0 - - -class QueueSentinel(enum.Enum): - CLOSED = 0 - TIMEOUT = 1 - - class EtcdRedisConfig(TypedDict, total=False): addr: Optional[HostPortPair] sentinel: Optional[Union[str, List[HostPortPair]]] @@ -1233,6 +1365,13 @@ class ModelServiceStatus(enum.Enum): UNHEALTHY = "unhealthy" +@dataclass +class ModelServiceProfile: + name: str + health_check_endpoint: str | None = dataclasses.field(default=None) + port: int | None = dataclasses.field(default=None) + + class RuntimeVariant(enum.StrEnum): VLLM = "vllm" NIM = "nim" @@ -1241,13 +1380,6 @@ class RuntimeVariant(enum.StrEnum): CUSTOM = "custom" -@dataclass -class ModelServiceProfile: - name: str - health_check_endpoint: str | None = dataclasses.field(default=None) - port: int | None = dataclasses.field(default=None) - - MODEL_SERVICE_RUNTIME_PROFILES: Mapping[RuntimeVariant, ModelServiceProfile] = { RuntimeVariant.CUSTOM: ModelServiceProfile(name="Custom (Default)"), RuntimeVariant.VLLM: ModelServiceProfile( @@ -1321,92 +1453,6 @@ def metric_string(self) -> str: return result -class CIStrEnum(enum.StrEnum): - """ - An StrEnum variant to allow case-insenstive matching of the members while the values are - lowercased. - """ - - @override - @classmethod - def _missing_(cls, value: Any) -> Self | None: - assert isinstance(value, str) # since this is an StrEnum - value = value.lower() - # To prevent infinite recursion, we don't rely on "cls(value)" but manually search the - # members as the official stdlib example suggests. - for member in cls: - if member.value == value: - return member - return None - - # The defualt behavior of `enum.auto()` is to set the value to the lowercased member name. - - @classmethod - def as_trafaret(cls) -> t.Trafaret: - return CIStrEnumTrafaret(cls) - - -class CIUpperStrEnum(CIStrEnum): - """ - An StrEnum variant to allow case-insenstive matching of the members while the values are - UPPERCASED. - """ - - @override - @classmethod - def _missing_(cls, value: Any) -> Self | None: - assert isinstance(value, str) # since this is an StrEnum - value = value.upper() - for member in cls: - if member.value == value: - return member - return None - - @override - @staticmethod - def _generate_next_value_(name, start, count, last_values) -> str: - return name.upper() - - @classmethod - def as_trafaret(cls) -> t.Trafaret: - return CIUpperStrEnumTrafaret(cls) - - -T_enum = TypeVar("T_enum", bound=enum.Enum) - - -class CIStrEnumTrafaret(t.Trafaret, Generic[T_enum]): - """ - A case-insensitive version of trafaret to parse StrEnum values. - """ - - def __init__(self, enum_cls: type[T_enum]) -> None: - self.enum_cls = enum_cls - - def check_and_return(self, value: str) -> T_enum: - try: - # Assume that the enum values are lowercases. - return self.enum_cls(value.lower()) - except (KeyError, ValueError): - self._failure(f"value is not a valid member of {self.enum_cls.__name__}", value=value) - - -class CIUpperStrEnumTrafaret(t.Trafaret, Generic[T_enum]): - """ - A case-insensitive version of trafaret to parse StrEnum values. - """ - - def __init__(self, enum_cls: type[T_enum]) -> None: - self.enum_cls = enum_cls - - def check_and_return(self, value: str) -> T_enum: - try: - # Assume that the enum values are lowercases. - return self.enum_cls(value.upper()) - except (KeyError, ValueError): - self._failure(f"value is not a valid member of {self.enum_cls.__name__}", value=value) - - class AutoScalingMetricSource(CIUpperStrEnum): KERNEL = enum.auto() INFERENCE_FRAMEWORK = enum.auto() From 561ac65a0a5946153426d817988e99b631719e09 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Fri, 17 Jan 2025 18:32:01 +0900 Subject: [PATCH 56/75] refactor: Add missing type annotations --- src/ai/backend/manager/models/endpoint.py | 51 ++++++++++++----------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/src/ai/backend/manager/models/endpoint.py b/src/ai/backend/manager/models/endpoint.py index 869929aea61..3846de9a540 100644 --- a/src/ai/backend/manager/models/endpoint.py +++ b/src/ai/backend/manager/models/endpoint.py @@ -4,6 +4,7 @@ import logging import uuid from collections.abc import ( + Container, Mapping, Sequence, ) @@ -266,7 +267,7 @@ def __init__( callback_url: Optional[yarl.URL] = None, environ: Optional[Mapping[str, Any]] = None, resource_opts: Optional[Mapping[str, Any]] = None, - open_to_public=False, + open_to_public: bool = False, ): self.id = uuid.uuid4() self.name = name @@ -301,12 +302,12 @@ async def get( domain: Optional[str] = None, project: Optional[uuid.UUID] = None, user_uuid: Optional[uuid.UUID] = None, - load_routes=False, - load_tokens=False, - load_image=False, - load_created_user=False, - load_session_owner=False, - load_model=False, + load_routes: bool = False, + load_tokens: bool = False, + load_image: bool = False, + load_created_user: bool = False, + load_session_owner: bool = False, + load_model: bool = False, ) -> Self: """ :raises: sqlalchemy.orm.exc.NoResultFound @@ -345,12 +346,12 @@ async def list( domain: Optional[str] = None, project: Optional[uuid.UUID] = None, user_uuid: Optional[uuid.UUID] = None, - load_routes=False, - load_image=False, - load_tokens=False, - load_created_user=False, - load_session_owner=False, - status_filter=[EndpointLifecycle.CREATED], + load_routes: bool = False, + load_image: bool = False, + load_tokens: bool = False, + load_created_user: bool = False, + load_session_owner: bool = False, + status_filter: Container[EndpointLifecycle] = [EndpointLifecycle.CREATED], ) -> list[Self]: query = ( sa.select(EndpointRow) @@ -384,12 +385,12 @@ async def batch_load( domain: Optional[str] = None, project: Optional[uuid.UUID] = None, user_uuid: Optional[uuid.UUID] = None, - load_routes=False, - load_image=False, - load_tokens=False, - load_created_user=False, - load_session_owner=False, - status_filter=[EndpointLifecycle.CREATED], + load_routes: bool = False, + load_image: bool = False, + load_tokens: bool = False, + load_created_user: bool = False, + load_session_owner: bool = False, + status_filter: Container[EndpointLifecycle] = [EndpointLifecycle.CREATED], ) -> Sequence[Self]: query = ( sa.select(EndpointRow) @@ -425,12 +426,12 @@ async def list_by_model( domain: Optional[str] = None, project: Optional[uuid.UUID] = None, user_uuid: Optional[uuid.UUID] = None, - load_routes=False, - load_image=False, - load_tokens=False, - load_created_user=False, - load_session_owner=False, - status_filter=[EndpointLifecycle.CREATED], + load_routes: bool = False, + load_image: bool = False, + load_tokens: bool = False, + load_created_user: bool = False, + load_session_owner: bool = False, + status_filter: Container[EndpointLifecycle] = [EndpointLifecycle.CREATED], ) -> Sequence[Self]: query = ( sa.select(EndpointRow) From 32e16360ade11f80bb8b7f87daf224d29346e3d4 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Fri, 17 Jan 2025 18:34:48 +0900 Subject: [PATCH 57/75] fix: Ensure immutability of default arguments --- src/ai/backend/manager/models/endpoint.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ai/backend/manager/models/endpoint.py b/src/ai/backend/manager/models/endpoint.py index 3846de9a540..54ee5e73538 100644 --- a/src/ai/backend/manager/models/endpoint.py +++ b/src/ai/backend/manager/models/endpoint.py @@ -351,7 +351,7 @@ async def list( load_tokens: bool = False, load_created_user: bool = False, load_session_owner: bool = False, - status_filter: Container[EndpointLifecycle] = [EndpointLifecycle.CREATED], + status_filter: Container[EndpointLifecycle] = frozenset([EndpointLifecycle.CREATED]), ) -> list[Self]: query = ( sa.select(EndpointRow) @@ -390,7 +390,7 @@ async def batch_load( load_tokens: bool = False, load_created_user: bool = False, load_session_owner: bool = False, - status_filter: Container[EndpointLifecycle] = [EndpointLifecycle.CREATED], + status_filter: Container[EndpointLifecycle] = frozenset([EndpointLifecycle.CREATED]), ) -> Sequence[Self]: query = ( sa.select(EndpointRow) @@ -431,7 +431,7 @@ async def list_by_model( load_tokens: bool = False, load_created_user: bool = False, load_session_owner: bool = False, - status_filter: Container[EndpointLifecycle] = [EndpointLifecycle.CREATED], + status_filter: Container[EndpointLifecycle] = frozenset([EndpointLifecycle.CREATED]), ) -> Sequence[Self]: query = ( sa.select(EndpointRow) From 98c3440c657b73cba2817db2d2aa52ffe602a121 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Fri, 17 Jan 2025 18:42:11 +0900 Subject: [PATCH 58/75] fix: GraphQL version annotations in new EndpointAutoScalingRuleNode --- .../manager/models/gql_models/endpoint.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/ai/backend/manager/models/gql_models/endpoint.py b/src/ai/backend/manager/models/gql_models/endpoint.py index 4d8ecb6e392..4b9bdc98587 100644 --- a/src/ai/backend/manager/models/gql_models/endpoint.py +++ b/src/ai/backend/manager/models/gql_models/endpoint.py @@ -71,18 +71,18 @@ class EndpointAutoScalingRuleNode(graphene.ObjectType): class Meta: interfaces = (AsyncNode,) - description = "Added in 25.01.0." + description = "Added in 25.1.0." row_id = graphene.UUID(required=True) metric_source = graphene.Field( - graphene.Enum.from_enum(AutoScalingMetricSource, description="Added in 25.01.0."), + graphene.Enum.from_enum(AutoScalingMetricSource), required=True, ) metric_name = graphene.String(required=True) threshold = graphene.String(required=True) comparator = graphene.Field( - graphene.Enum.from_enum(AutoScalingMetricComparator, description="Added in 25.01.0."), + graphene.Enum.from_enum(AutoScalingMetricComparator), required=True, ) step_size = graphene.Int(required=True) @@ -220,12 +220,12 @@ async def get_connection( class EndpointAutoScalingRuleConnection(Connection): class Meta: node = EndpointAutoScalingRuleNode - description = "Added in 25.01.0." + description = "Added in 25.1.0." class EndpointAutoScalingRuleInput(graphene.InputObjectType): class Meta: - description = "Added in 25.01.0." + description = "Added in 25.1.0." metric_source = graphene.Field( graphene.Enum.from_enum( @@ -255,7 +255,7 @@ class Meta: class ModifyEndpointAutoScalingRuleInput(graphene.InputObjectType): class Meta: - description = "Added in 25.01.0." + description = "Added in 25.1.0." metric_source = graphene.Field( graphene.Enum.from_enum( @@ -291,7 +291,7 @@ class Arguments: props = EndpointAutoScalingRuleInput(required=True) class Meta: - description = "Added in 25.01.0." + description = "Added in 25.1.0." ok = graphene.Boolean() msg = graphene.String() @@ -369,7 +369,7 @@ class Arguments: props = ModifyEndpointAutoScalingRuleInput(required=True) class Meta: - description = "Added in 25.01.0." + description = "Added in 25.1.0." ok = graphene.Boolean() msg = graphene.String() @@ -440,7 +440,7 @@ class Arguments: id = graphene.String(required=True) class Meta: - description = "Added in 25.01.0." + description = "Added in 25.1.0." ok = graphene.Boolean() msg = graphene.String() From c8c5617f7117ee5ce47fbb9f86f15d24af8d94b6 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Fri, 17 Jan 2025 09:44:39 +0000 Subject: [PATCH 59/75] chore: update api schema dump Co-authored-by: octodog --- docs/manager/graphql-reference/schema.graphql | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/manager/graphql-reference/schema.graphql b/docs/manager/graphql-reference/schema.graphql index 9545e04daea..9e398862fc4 100644 --- a/docs/manager/graphql-reference/schema.graphql +++ b/docs/manager/graphql-reference/schema.graphql @@ -1659,7 +1659,7 @@ type NetworkEdge { cursor: String! } -"""Added in 25.01.0.""" +"""Added in 25.1.0.""" type EndpointAutoScalingRuleNode implements Node { """The ID of the object""" id: ID! @@ -1677,13 +1677,13 @@ type EndpointAutoScalingRuleNode implements Node { endpoint: UUID! } -"""Added in 25.01.0.""" +"""An enumeration.""" enum AutoScalingMetricSource { KERNEL INFERENCE_FRAMEWORK } -"""Added in 25.01.0.""" +"""An enumeration.""" enum AutoScalingMetricComparator { LESS_THAN LESS_THAN_OR_EQUAL @@ -1691,7 +1691,7 @@ enum AutoScalingMetricComparator { GREATER_THAN_OR_EQUAL } -"""Added in 25.01.0.""" +"""Added in 25.1.0.""" type EndpointAutoScalingRuleConnection { """Pagination data for this connection.""" pageInfo: PageInfo! @@ -1704,7 +1704,7 @@ type EndpointAutoScalingRuleConnection { } """ -Added in 25.01.0. A Relay edge containing a `EndpointAutoScalingRule` and its cursor. +Added in 25.1.0. A Relay edge containing a `EndpointAutoScalingRule` and its cursor. """ type EndpointAutoScalingRuleEdge { """The item at the end of the edge""" @@ -2663,14 +2663,14 @@ type DeleteContainerRegistryNode { container_registry: ContainerRegistryNode } -"""Added in 25.01.0.""" +"""Added in 25.1.0.""" type CreateEndpointAutoScalingRuleNode { ok: Boolean msg: String rule: EndpointAutoScalingRuleNode } -"""Added in 25.01.0.""" +"""Added in 25.1.0.""" input EndpointAutoScalingRuleInput { metric_source: AutoScalingMetricSource! metric_name: String! @@ -2682,14 +2682,14 @@ input EndpointAutoScalingRuleInput { max_replicas: Int } -"""Added in 25.01.0.""" +"""Added in 25.1.0.""" type ModifyEndpointAutoScalingRuleNode { ok: Boolean msg: String rule: EndpointAutoScalingRuleNode } -"""Added in 25.01.0.""" +"""Added in 25.1.0.""" input ModifyEndpointAutoScalingRuleInput { metric_source: AutoScalingMetricSource metric_name: String @@ -2701,7 +2701,7 @@ input ModifyEndpointAutoScalingRuleInput { max_replicas: Int } -"""Added in 25.01.0.""" +"""Added in 25.1.0.""" type DeleteEndpointAutoScalingRuleNode { ok: Boolean msg: String From 3887004372ad221460821c74550a649069528876 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Fri, 17 Jan 2025 18:52:03 +0900 Subject: [PATCH 60/75] fix: ObjectNotFound usage --- src/ai/backend/manager/models/endpoint.py | 2 +- .../manager/models/gql_models/endpoint.py | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/ai/backend/manager/models/endpoint.py b/src/ai/backend/manager/models/endpoint.py index 54ee5e73538..711d43b20c7 100644 --- a/src/ai/backend/manager/models/endpoint.py +++ b/src/ai/backend/manager/models/endpoint.py @@ -641,7 +641,7 @@ async def get( result = await session.execute(query) row = result.scalar() if not row: - raise ObjectNotFound("endpoint_auto_scaling_rule") + raise ObjectNotFound(object_name="Endpoint Autoscaling Rule") return row async def remove_rule( diff --git a/src/ai/backend/manager/models/gql_models/endpoint.py b/src/ai/backend/manager/models/gql_models/endpoint.py index 4b9bdc98587..2d33d8ca028 100644 --- a/src/ai/backend/manager/models/gql_models/endpoint.py +++ b/src/ai/backend/manager/models/gql_models/endpoint.py @@ -124,7 +124,7 @@ async def get_node(cls, info: graphene.ResolveInfo, rule_id: str) -> Self: try: _rule_id = uuid.UUID(raw_rule_id) except ValueError: - raise ObjectNotFound("endpoint_auto_scaling_rule") + raise ObjectNotFound(object_name="endpoint autoscaling rule") async with graph_ctx.db.begin_readonly_session() as db_session: rule_row = await EndpointAutoScalingRuleRow.get( @@ -194,11 +194,11 @@ async def get_connection( try: _endpoint_id = uuid.UUID(raw_endpoint_id) except ValueError: - raise ObjectNotFound("endpoint") + raise ObjectNotFound(object_name="Endpoint") try: row = await EndpointRow.get(db_session, _endpoint_id) except NoResultFound: - raise ObjectNotFound(object_name="endpoint") + raise ObjectNotFound(object_name="Endpoint") match graph_ctx.user["role"]: case UserRole.SUPERADMIN: @@ -316,14 +316,14 @@ async def mutate( try: _endpoint_id = uuid.UUID(raw_endpoint_id) except ValueError: - raise ObjectNotFound("endpoint") + raise ObjectNotFound(object_name="Endpoint") graph_ctx: GraphQueryContext = info.context async with graph_ctx.db.begin_session(commit_on_end=True) as db_session: try: row = await EndpointRow.get(db_session, _endpoint_id) except NoResultFound: - raise ObjectNotFound(object_name="endpoint") + raise ObjectNotFound(object_name="Endpoint") match graph_ctx.user["role"]: case UserRole.SUPERADMIN: @@ -390,14 +390,14 @@ async def mutate( try: _rule_id = uuid.UUID(rule_id) except ValueError: - raise ObjectNotFound("auto_scaling_rule") + raise ObjectNotFound(object_name="Endpoint Autoscaling Rule") graph_ctx: GraphQueryContext = info.context async with graph_ctx.db.begin_session(commit_on_end=True) as db_session: try: row = await EndpointAutoScalingRuleRow.get(db_session, _rule_id, load_endpoint=True) except NoResultFound: - raise ObjectNotFound(object_name="auto_scaling_rule") + raise ObjectNotFound(object_name="Endpoint Autoscaling Rule") match graph_ctx.user["role"]: case UserRole.SUPERADMIN: @@ -459,14 +459,14 @@ async def mutate( try: _rule_id = uuid.UUID(rule_id) except ValueError: - raise ObjectNotFound("auto_scaling_rule") + raise ObjectNotFound(object_name="Endpoint Autoscaling Rule") graph_ctx: GraphQueryContext = info.context async with graph_ctx.db.begin_session(commit_on_end=True) as db_session: try: row = await EndpointAutoScalingRuleRow.get(db_session, _rule_id, load_endpoint=True) except NoResultFound: - raise ObjectNotFound(object_name="auto_scaling_rule") + raise ObjectNotFound(object_name="Endpoint Autoscaling Rule") match graph_ctx.user["role"]: case UserRole.SUPERADMIN: From 4dcb487c80cb7234589d372872c04c81faa0439d Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Fri, 17 Jan 2025 18:54:28 +0900 Subject: [PATCH 61/75] fix: missing.. --- src/ai/backend/manager/models/gql_models/endpoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ai/backend/manager/models/gql_models/endpoint.py b/src/ai/backend/manager/models/gql_models/endpoint.py index 2d33d8ca028..35501700d0d 100644 --- a/src/ai/backend/manager/models/gql_models/endpoint.py +++ b/src/ai/backend/manager/models/gql_models/endpoint.py @@ -124,7 +124,7 @@ async def get_node(cls, info: graphene.ResolveInfo, rule_id: str) -> Self: try: _rule_id = uuid.UUID(raw_rule_id) except ValueError: - raise ObjectNotFound(object_name="endpoint autoscaling rule") + raise ObjectNotFound(object_name="Endpoint Autoscaling Rule") async with graph_ctx.db.begin_readonly_session() as db_session: rule_row = await EndpointAutoScalingRuleRow.get( From 2ea172721d0dc925538710bc9409cb289d13639b Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Fri, 17 Jan 2025 18:58:11 +0900 Subject: [PATCH 62/75] doc: Refine news fragment --- changes/3277.feature.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changes/3277.feature.md b/changes/3277.feature.md index c3e45b09110..f27559a3654 100644 --- a/changes/3277.feature.md +++ b/changes/3277.feature.md @@ -1 +1 @@ -Support model service auto scaling +Support auto-scaling of model services by observing proxy and app-specific metrics as configured by autoscaling rules bound to each endpoint From ee62b1f5735ed11d9da123d523978991c203b4a2 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Fri, 17 Jan 2025 19:58:43 +0900 Subject: [PATCH 63/75] refactor: Extract out GQLEnum types and specify the version-added note. --- .../manager/models/gql_models/endpoint.py | 42 +++++++------------ 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/src/ai/backend/manager/models/gql_models/endpoint.py b/src/ai/backend/manager/models/gql_models/endpoint.py index 35501700d0d..0518e83b7fc 100644 --- a/src/ai/backend/manager/models/gql_models/endpoint.py +++ b/src/ai/backend/manager/models/gql_models/endpoint.py @@ -68,6 +68,16 @@ } +AutoScalingMetricSourceGQLEnum = graphene.Enum.from_enum( + AutoScalingMetricSource, + description=f"Available values: {generate_desc_for_enum_kvlist(AutoScalingMetricSource)}. Added in 25.1.0.", +) +AutoScalingMetricComparatorGQLEnum = graphene.Enum.from_enum( + AutoScalingMetricComparator, + description=f"Available values: {generate_desc_for_enum_kvlist(AutoScalingMetricComparator)}. Added in 25.1.0.", +) + + class EndpointAutoScalingRuleNode(graphene.ObjectType): class Meta: interfaces = (AsyncNode,) @@ -76,13 +86,13 @@ class Meta: row_id = graphene.UUID(required=True) metric_source = graphene.Field( - graphene.Enum.from_enum(AutoScalingMetricSource), + AutoScalingMetricSourceGQLEnum, required=True, ) metric_name = graphene.String(required=True) threshold = graphene.String(required=True) comparator = graphene.Field( - graphene.Enum.from_enum(AutoScalingMetricComparator), + AutoScalingMetricComparatorGQLEnum, required=True, ) step_size = graphene.Int(required=True) @@ -228,23 +238,13 @@ class Meta: description = "Added in 25.1.0." metric_source = graphene.Field( - graphene.Enum.from_enum( - AutoScalingMetricSource, - description=( - f"Available values: {generate_desc_for_enum_kvlist(AutoScalingMetricSource)}" - ), - ), + AutoScalingMetricSourceGQLEnum, required=True, ) metric_name = graphene.String(required=True) threshold = graphene.String(required=True) comparator = graphene.Field( - graphene.Enum.from_enum( - AutoScalingMetricComparator, - description=( - f"Available values: {generate_desc_for_enum_kvlist(AutoScalingMetricComparator)}" - ), - ), + AutoScalingMetricComparatorGQLEnum, required=True, ) step_size = graphene.Int(required=True) @@ -258,23 +258,13 @@ class Meta: description = "Added in 25.1.0." metric_source = graphene.Field( - graphene.Enum.from_enum( - AutoScalingMetricSource, - description=( - f"Available values: {', '.join([p.name for p in AutoScalingMetricSource])}" - ), - ), + AutoScalingMetricSourceGQLEnum, default_value=Undefined, ) metric_name = graphene.String() threshold = graphene.String() comparator = graphene.Field( - graphene.Enum.from_enum( - AutoScalingMetricComparator, - description=( - f"Available values: {', '.join([p.name for p in AutoScalingMetricComparator])}" - ), - ), + AutoScalingMetricComparatorGQLEnum, default_value=Undefined, ) step_size = graphene.Int() From 286ce804ff942e1e3412193a0a20bc491e53a866 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Fri, 17 Jan 2025 11:01:07 +0000 Subject: [PATCH 64/75] chore: update api schema dump Co-authored-by: octodog --- docs/manager/graphql-reference/schema.graphql | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/manager/graphql-reference/schema.graphql b/docs/manager/graphql-reference/schema.graphql index 9e398862fc4..6c4dba115b7 100644 --- a/docs/manager/graphql-reference/schema.graphql +++ b/docs/manager/graphql-reference/schema.graphql @@ -1677,13 +1677,17 @@ type EndpointAutoScalingRuleNode implements Node { endpoint: UUID! } -"""An enumeration.""" +""" +Available values: 'KERNEL' (KERNEL), 'INFERENCE_FRAMEWORK' (INFERENCE_FRAMEWORK). Added in 25.1.0. +""" enum AutoScalingMetricSource { KERNEL INFERENCE_FRAMEWORK } -"""An enumeration.""" +""" +Available values: 'LESS_THAN' (LESS_THAN), 'LESS_THAN_OR_EQUAL' (LESS_THAN_OR_EQUAL), 'GREATER_THAN' (GREATER_THAN), 'GREATER_THAN_OR_EQUAL' (GREATER_THAN_OR_EQUAL). Added in 25.1.0. +""" enum AutoScalingMetricComparator { LESS_THAN LESS_THAN_OR_EQUAL From ade2a404d9472314fe8f049a2b1236f92083d1a5 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Fri, 17 Jan 2025 20:12:33 +0900 Subject: [PATCH 65/75] fix: Do not repeat the enum name and value if it's CIStrEnum --- src/ai/backend/manager/models/utils.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/ai/backend/manager/models/utils.py b/src/ai/backend/manager/models/utils.py index 3d66f1a998c..e5559339349 100644 --- a/src/ai/backend/manager/models/utils.py +++ b/src/ai/backend/manager/models/utils.py @@ -43,6 +43,7 @@ ) from ai.backend.common.json import ExtendedJSONEncoder +from ai.backend.common.types import CIStrEnum, CIUpperStrEnum from ai.backend.logging import BraceStyleAdapter if TYPE_CHECKING: @@ -553,8 +554,14 @@ async def vacuum_db( await conn.exec_driver_sql(vacuum_sql) -def generate_desc_for_enum_kvlist(e: type[enum.StrEnum]) -> str: +def generate_desc_for_enum_kvlist( + e: type[enum.StrEnum] | type[CIStrEnum] | type[CIUpperStrEnum], +) -> str: items = [] - for name, value in e.__members__.items(): - items.append(f"{str(value)!r} ({name})") + if issubclass(e, enum.StrEnum): + for name, value in e.__members__.items(): + items.append(f"{str(value)!r} ({name})") + elif issubclass(e, (CIStrEnum, CIUpperStrEnum)): + for name, value in e.__members__.items(): + items.append(f"{str(value)!r}") return ", ".join(items) From 59d7beb0b50a7be65be52cd83310a136e5564906 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Fri, 17 Jan 2025 20:18:21 +0900 Subject: [PATCH 66/75] fix: Ah.. it's useless to include the enum list in GraphQL enum because IDEs will offer auto-completion already. --- .../backend/manager/models/gql_models/endpoint.py | 5 ++--- src/ai/backend/manager/models/utils.py | 15 --------------- 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/src/ai/backend/manager/models/gql_models/endpoint.py b/src/ai/backend/manager/models/gql_models/endpoint.py index 0518e83b7fc..c4a98c03e76 100644 --- a/src/ai/backend/manager/models/gql_models/endpoint.py +++ b/src/ai/backend/manager/models/gql_models/endpoint.py @@ -35,7 +35,6 @@ from ..minilang.ordering import OrderSpecItem, QueryOrderParser from ..minilang.queryfilter import FieldSpecItem, QueryFilterParser from ..user import UserRole -from ..utils import generate_desc_for_enum_kvlist if TYPE_CHECKING: from ..gql import GraphQueryContext @@ -70,11 +69,11 @@ AutoScalingMetricSourceGQLEnum = graphene.Enum.from_enum( AutoScalingMetricSource, - description=f"Available values: {generate_desc_for_enum_kvlist(AutoScalingMetricSource)}. Added in 25.1.0.", + description="The source type to fetch metrics. Added in 25.1.0.", ) AutoScalingMetricComparatorGQLEnum = graphene.Enum.from_enum( AutoScalingMetricComparator, - description=f"Available values: {generate_desc_for_enum_kvlist(AutoScalingMetricComparator)}. Added in 25.1.0.", + description="The comparator used to compare the metric value with the threshold. Added in 25.1.0.", ) diff --git a/src/ai/backend/manager/models/utils.py b/src/ai/backend/manager/models/utils.py index e5559339349..2731b9fce4d 100644 --- a/src/ai/backend/manager/models/utils.py +++ b/src/ai/backend/manager/models/utils.py @@ -1,7 +1,6 @@ from __future__ import annotations import asyncio -import enum import functools import json import logging @@ -43,7 +42,6 @@ ) from ai.backend.common.json import ExtendedJSONEncoder -from ai.backend.common.types import CIStrEnum, CIUpperStrEnum from ai.backend.logging import BraceStyleAdapter if TYPE_CHECKING: @@ -552,16 +550,3 @@ async def vacuum_db( vacuum_sql = "VACUUM FULL" if vacuum_full else "VACUUM" log.info(f"Perfoming {vacuum_sql} operation...") await conn.exec_driver_sql(vacuum_sql) - - -def generate_desc_for_enum_kvlist( - e: type[enum.StrEnum] | type[CIStrEnum] | type[CIUpperStrEnum], -) -> str: - items = [] - if issubclass(e, enum.StrEnum): - for name, value in e.__members__.items(): - items.append(f"{str(value)!r} ({name})") - elif issubclass(e, (CIStrEnum, CIUpperStrEnum)): - for name, value in e.__members__.items(): - items.append(f"{str(value)!r}") - return ", ".join(items) From 53a8de963a59076bb5da9249e69b13a39e692975 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Fri, 17 Jan 2025 11:20:51 +0000 Subject: [PATCH 67/75] chore: update api schema dump Co-authored-by: octodog --- docs/manager/graphql-reference/schema.graphql | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/manager/graphql-reference/schema.graphql b/docs/manager/graphql-reference/schema.graphql index 6c4dba115b7..933b6b1c84b 100644 --- a/docs/manager/graphql-reference/schema.graphql +++ b/docs/manager/graphql-reference/schema.graphql @@ -1677,16 +1677,14 @@ type EndpointAutoScalingRuleNode implements Node { endpoint: UUID! } -""" -Available values: 'KERNEL' (KERNEL), 'INFERENCE_FRAMEWORK' (INFERENCE_FRAMEWORK). Added in 25.1.0. -""" +"""The source type to fetch metrics. Added in 25.1.0.""" enum AutoScalingMetricSource { KERNEL INFERENCE_FRAMEWORK } """ -Available values: 'LESS_THAN' (LESS_THAN), 'LESS_THAN_OR_EQUAL' (LESS_THAN_OR_EQUAL), 'GREATER_THAN' (GREATER_THAN), 'GREATER_THAN_OR_EQUAL' (GREATER_THAN_OR_EQUAL). Added in 25.1.0. +The comparator used to compare the metric value with the threshold. Added in 25.1.0. """ enum AutoScalingMetricComparator { LESS_THAN From ca44ff6dbd7347d9d8ae89bd4febf30831cac122 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Fri, 17 Jan 2025 21:40:15 +0900 Subject: [PATCH 68/75] fix: Log the autoscaling decisions explicitly (INFO instead of DEBUG) --- .../backend/manager/scheduler/dispatcher.py | 43 ++++++++++--------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/src/ai/backend/manager/scheduler/dispatcher.py b/src/ai/backend/manager/scheduler/dispatcher.py index 0f594bb839d..ddf36406544 100644 --- a/src/ai/backend/manager/scheduler/dispatcher.py +++ b/src/ai/backend/manager/scheduler/dispatcher.py @@ -1443,8 +1443,8 @@ async def _autoscale_endpoints( for rule in rules: should_trigger = False if len(endpoint_by_id[rule.endpoint].routings) == 0: - log.debug( - "_autoscale_endpoints(e: {}, r: {}): endpoint does not have any replicas, skipping", + log.log( + "AUTOSCALE(e:{}, rule:{}): endpoint does not have any replicas, skipping", rule.endpoint, rule.id, ) @@ -1474,8 +1474,8 @@ async def _autoscale_endpoints( continue live_stat = endpoint_statistics_by_id[rule.endpoint] if rule.metric_name not in live_stat: - log.debug( - "_autoscale_endpoints(e: {}, r: {}): metric {} does not exist, skipping", + log.log( + "AUTOSCALE(e:{}, rule:{}): skipping the rule because metric {} does not exist", rule.endpoint, rule.id, rule.metric_name, @@ -1485,7 +1485,7 @@ async def _autoscale_endpoints( endpoint_by_id[rule.endpoint].routings ) case _: - raise AssertionError("Should not reach here") # FIXME: Replace with named error + raise NotImplementedError match rule.comparator: case AutoScalingMetricComparator.LESS_THAN: @@ -1498,24 +1498,25 @@ async def _autoscale_endpoints( should_trigger = current_value >= rule.threshold log.debug( - "_autoscale_endpoints(e: {}, r: {}): {} {} {}: {}", + "AUTOSCALE(e:{}, rule:{}): {} {} {}: {}", rule.endpoint, rule.id, current_value, - rule.comparator.value, + rule.comparator, rule.threshold, should_trigger, ) if should_trigger: - new_replicas = rule.endpoint_row.replicas + rule.step_size - if (rule.min_replicas is not None and new_replicas < rule.min_replicas) or ( - rule.max_replicas is not None and new_replicas > rule.max_replicas + new_replica_count = max(0, rule.endpoint_row.replicas + rule.step_size) + if (rule.min_replicas is not None and new_replica_count < rule.min_replicas) or ( + rule.max_replicas is not None and new_replica_count > rule.max_replicas ): - log.debug( - "_autoscale_endpoints(e: {}, r: {}): new replica count {} violates min ({}) / max ({}) replica restriction; skipping", + log.log( + "AUTOSCALE(e:{}, rule:{}): ignored the new replica count {} ({}) [min: {}, max: {}]", rule.endpoint, rule.id, - new_replicas, + new_replica_count, + rule.step_size, rule.min_replicas, rule.max_replicas, ) @@ -1525,21 +1526,23 @@ async def _autoscale_endpoints( ): # changes applied here will be reflected at consequent queries (at `scale_services()`) # so we do not have to propagate the changes on the function level - rule.endpoint_row.replicas += rule.step_size - if rule.endpoint_row.replicas < 0: - rule.endpoint_row.replicas = 0 + rule.endpoint_row.replicas = new_replica_count rule.last_triggered_at = current_datetime - log.debug( - "_autoscale_endpoints(e: {}, r: {}): added {} to replica count", + log.log( + "AUTOSCALE(e:{}, rule:{}): applied the new replica count {} ({})", rule.endpoint, rule.id, + new_replica_count, rule.step_size, ) else: - log.debug( - "_autoscale_endpoints(e: {}, r: {}): rule on cooldown period; deferring execution", + log.log( + "AUTOSCALE(e:{}, rule:{}): ignore the new replica count {} ({}) as the rule is on a cooldown period until {}", rule.endpoint, rule.id, + new_replica_count, + rule.step_size, + rule.last_triggered_at, ) async def scale_services( From 462218255d270c3bc9fa7af68132346d0062320d Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Fri, 17 Jan 2025 21:46:51 +0900 Subject: [PATCH 69/75] fix: Remove unnecessary logic, fix a typop --- src/ai/backend/manager/scheduler/dispatcher.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/ai/backend/manager/scheduler/dispatcher.py b/src/ai/backend/manager/scheduler/dispatcher.py index ddf36406544..0a3720bda10 100644 --- a/src/ai/backend/manager/scheduler/dispatcher.py +++ b/src/ai/backend/manager/scheduler/dispatcher.py @@ -1442,14 +1442,6 @@ async def _autoscale_endpoints( for rule in rules: should_trigger = False - if len(endpoint_by_id[rule.endpoint].routings) == 0: - log.log( - "AUTOSCALE(e:{}, rule:{}): endpoint does not have any replicas, skipping", - rule.endpoint, - rule.id, - ) - continue - match rule.metric_source: # kernel metrics should be evaluated by the average of the metric across every kernels case AutoScalingMetricSource.KERNEL: @@ -1537,7 +1529,7 @@ async def _autoscale_endpoints( ) else: log.log( - "AUTOSCALE(e:{}, rule:{}): ignore the new replica count {} ({}) as the rule is on a cooldown period until {}", + "AUTOSCALE(e:{}, rule:{}): ignored the new replica count {} ({}) as the rule is on a cooldown period until {}", rule.endpoint, rule.id, new_replica_count, From bc144dd4e205e02f140f1ab3fe0d62e39a5cb67e Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Fri, 17 Jan 2025 23:01:48 +0900 Subject: [PATCH 70/75] fix: Explicitly log all skipping cases --- src/ai/backend/manager/scheduler/dispatcher.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/ai/backend/manager/scheduler/dispatcher.py b/src/ai/backend/manager/scheduler/dispatcher.py index 0a3720bda10..3098444fd23 100644 --- a/src/ai/backend/manager/scheduler/dispatcher.py +++ b/src/ai/backend/manager/scheduler/dispatcher.py @@ -1440,6 +1440,11 @@ async def _autoscale_endpoints( for endpoint_id, metric in zip(metric_requested_endpoints, endpoint_live_stats) } + log_skip_due_to_missing_metric = partial( + log.warning, + "AUTOSCALE(e:{0.endpoint}, rule:{0.id}): skipping the rule because metric {0.metric_name} does not exist", + ) + for rule in rules: should_trigger = False match rule.metric_source: @@ -1459,19 +1464,16 @@ async def _autoscale_endpoints( live_stat[rule.metric_name]["current"] ) if metric_found_kernel_count == 0: + log_skip_due_to_missing_metric(rule) continue current_value = metric_aggregated_value / Decimal(metric_found_kernel_count) case AutoScalingMetricSource.INFERENCE_FRAMEWORK: if not endpoint_statistics_by_id[rule.endpoint]: + log_skip_due_to_missing_metric(rule) continue live_stat = endpoint_statistics_by_id[rule.endpoint] if rule.metric_name not in live_stat: - log.log( - "AUTOSCALE(e:{}, rule:{}): skipping the rule because metric {} does not exist", - rule.endpoint, - rule.id, - rule.metric_name, - ) + log_skip_due_to_missing_metric(rule) continue current_value = Decimal(live_stat[rule.metric_name]["current"]) / len( endpoint_by_id[rule.endpoint].routings From b60598256d8d0457207a3cb7fa7a0098ae0545e7 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Fri, 17 Jan 2025 23:05:47 +0900 Subject: [PATCH 71/75] fix: oops --- src/ai/backend/manager/scheduler/dispatcher.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ai/backend/manager/scheduler/dispatcher.py b/src/ai/backend/manager/scheduler/dispatcher.py index 3098444fd23..7469e2ab61c 100644 --- a/src/ai/backend/manager/scheduler/dispatcher.py +++ b/src/ai/backend/manager/scheduler/dispatcher.py @@ -1505,7 +1505,7 @@ async def _autoscale_endpoints( if (rule.min_replicas is not None and new_replica_count < rule.min_replicas) or ( rule.max_replicas is not None and new_replica_count > rule.max_replicas ): - log.log( + log.info( "AUTOSCALE(e:{}, rule:{}): ignored the new replica count {} ({}) [min: {}, max: {}]", rule.endpoint, rule.id, @@ -1522,7 +1522,7 @@ async def _autoscale_endpoints( # so we do not have to propagate the changes on the function level rule.endpoint_row.replicas = new_replica_count rule.last_triggered_at = current_datetime - log.log( + log.info( "AUTOSCALE(e:{}, rule:{}): applied the new replica count {} ({})", rule.endpoint, rule.id, @@ -1530,7 +1530,7 @@ async def _autoscale_endpoints( rule.step_size, ) else: - log.log( + log.info( "AUTOSCALE(e:{}, rule:{}): ignored the new replica count {} ({}) as the rule is on a cooldown period until {}", rule.endpoint, rule.id, From 0ca161f14957b9a5311575f082c34a3b8fdb6d53 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Fri, 17 Jan 2025 23:11:03 +0900 Subject: [PATCH 72/75] fix: Server-side should always use UTC if not specified --- src/ai/backend/manager/scheduler/dispatcher.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ai/backend/manager/scheduler/dispatcher.py b/src/ai/backend/manager/scheduler/dispatcher.py index 7469e2ab61c..94adc758fb5 100644 --- a/src/ai/backend/manager/scheduler/dispatcher.py +++ b/src/ai/backend/manager/scheduler/dispatcher.py @@ -13,7 +13,7 @@ Sequence, ) from contextvars import ContextVar -from datetime import datetime, timedelta, timezone +from datetime import UTC, datetime, timedelta, timezone from decimal import Decimal from functools import partial from typing import ( @@ -1383,7 +1383,7 @@ async def _autoscale_endpoints( self, session: SASession, ) -> None: - current_datetime = datetime.now() + current_datetime = datetime.now(tz=UTC) rules = await EndpointAutoScalingRuleRow.list(session, load_endpoint=True) # currently auto scaling supports two types of stat as source: kernel and endpoint @@ -1515,7 +1515,7 @@ async def _autoscale_endpoints( rule.max_replicas, ) continue - if rule.last_triggered_at is None or rule.last_triggered_at.replace(tzinfo=None) < ( + if rule.last_triggered_at is None or rule.last_triggered_at < ( current_datetime - timedelta(seconds=rule.cooldown_seconds) ): # changes applied here will be reflected at consequent queries (at `scale_services()`) @@ -1536,7 +1536,7 @@ async def _autoscale_endpoints( rule.id, new_replica_count, rule.step_size, - rule.last_triggered_at, + rule.last_triggered_at + timedelta(seconds=rule.cooldown_seconds), ) async def scale_services( From b03efd35604a5eb9596b7e0aa6a4d1c8364faccd Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Fri, 17 Jan 2025 23:15:13 +0900 Subject: [PATCH 73/75] refactor: Use shorter UUID type phrase --- src/ai/backend/manager/models/endpoint.py | 118 +++++++++--------- .../manager/models/gql_models/endpoint.py | 12 +- 2 files changed, 65 insertions(+), 65 deletions(-) diff --git a/src/ai/backend/manager/models/endpoint.py b/src/ai/backend/manager/models/endpoint.py index 711d43b20c7..36a010c18e1 100644 --- a/src/ai/backend/manager/models/endpoint.py +++ b/src/ai/backend/manager/models/endpoint.py @@ -2,7 +2,6 @@ import datetime import logging -import uuid from collections.abc import ( Container, Mapping, @@ -18,6 +17,7 @@ Self, cast, ) +from uuid import UUID, uuid4 import graphene import jwt @@ -246,13 +246,13 @@ def __init__( self, name: str, model_definition_path: str | None, - created_user: uuid.UUID, - session_owner: uuid.UUID, + created_user: UUID, + session_owner: UUID, replicas: int, image: ImageRow, - model: uuid.UUID, + model: UUID, domain: str, - project: uuid.UUID, + project: UUID, resource_group: str, resource_slots: Mapping[str, Any], cluster_mode: ClusterMode, @@ -269,7 +269,7 @@ def __init__( resource_opts: Optional[Mapping[str, Any]] = None, open_to_public: bool = False, ): - self.id = uuid.uuid4() + self.id = uuid4() self.name = name self.model_definition_path = model_definition_path self.created_user = created_user @@ -298,10 +298,10 @@ def __init__( async def get( cls, session: AsyncSession, - endpoint_id: uuid.UUID, + endpoint_id: UUID, domain: Optional[str] = None, - project: Optional[uuid.UUID] = None, - user_uuid: Optional[uuid.UUID] = None, + project: Optional[UUID] = None, + user_uuid: Optional[UUID] = None, load_routes: bool = False, load_tokens: bool = False, load_image: bool = False, @@ -344,8 +344,8 @@ async def list( cls, session: AsyncSession, domain: Optional[str] = None, - project: Optional[uuid.UUID] = None, - user_uuid: Optional[uuid.UUID] = None, + project: Optional[UUID] = None, + user_uuid: Optional[UUID] = None, load_routes: bool = False, load_image: bool = False, load_tokens: bool = False, @@ -381,10 +381,10 @@ async def list( async def batch_load( cls, session: AsyncSession, - endpoint_ids: Sequence[uuid.UUID], + endpoint_ids: Sequence[UUID], domain: Optional[str] = None, - project: Optional[uuid.UUID] = None, - user_uuid: Optional[uuid.UUID] = None, + project: Optional[UUID] = None, + user_uuid: Optional[UUID] = None, load_routes: bool = False, load_image: bool = False, load_tokens: bool = False, @@ -422,10 +422,10 @@ async def batch_load( async def list_by_model( cls, session: AsyncSession, - model_id: uuid.UUID, + model_id: UUID, domain: Optional[str] = None, - project: Optional[uuid.UUID] = None, - user_uuid: Optional[uuid.UUID] = None, + project: Optional[UUID] = None, + user_uuid: Optional[UUID] = None, load_routes: bool = False, load_image: bool = False, load_tokens: bool = False, @@ -472,7 +472,7 @@ async def create_auto_scaling_rule( max_replicas: int | None = None, ) -> EndpointAutoScalingRuleRow: row = EndpointAutoScalingRuleRow( - id=uuid.uuid4(), + id=uuid4(), endpoint=self.id, metric_source=metric_source, metric_name=metric_name, @@ -516,12 +516,12 @@ class EndpointTokenRow(Base): def __init__( self, - id: uuid.UUID, + id: UUID, token: str, - endpoint: uuid.UUID, + endpoint: UUID, domain: str, - project: uuid.UUID, - session_owner: uuid.UUID, + project: UUID, + session_owner: UUID, ) -> None: self.id = id self.token = token @@ -534,11 +534,11 @@ def __init__( async def list( cls, session: AsyncSession, - endpoint_id: uuid.UUID, + endpoint_id: UUID, *, domain: Optional[str] = None, - project: Optional[uuid.UUID] = None, - user_uuid: Optional[uuid.UUID] = None, + project: Optional[UUID] = None, + user_uuid: Optional[UUID] = None, load_endpoint=False, ) -> Sequence[Self]: query = ( @@ -564,8 +564,8 @@ async def get( token: str, *, domain: Optional[str] = None, - project: Optional[uuid.UUID] = None, - user_uuid: Optional[uuid.UUID] = None, + project: Optional[UUID] = None, + user_uuid: Optional[UUID] = None, load_endpoint=False, ) -> Self: query = sa.select(EndpointTokenRow).filter(EndpointTokenRow.token == token) @@ -633,7 +633,7 @@ async def list(cls, session: AsyncSession, load_endpoint=False) -> Sequence[Self @classmethod async def get( - cls, session: AsyncSession, id: uuid.UUID, load_endpoint=False + cls, session: AsyncSession, id: UUID, load_endpoint=False ) -> "EndpointAutoScalingRuleRow": query = sa.select(EndpointAutoScalingRuleRow).filter(EndpointAutoScalingRuleRow.id == id) if load_endpoint: @@ -658,7 +658,7 @@ async def check_scaling_group( scaling_group: str, owner_access_key: AccessKey, target_domain: str, - target_project: str | uuid.UUID, + target_project: str | UUID, ) -> str: """ Wrapper of `registry.check_scaling_group()` with additional guards flavored for @@ -697,9 +697,9 @@ async def check_extra_mounts( conn: AsyncConnection, shared_config: "SharedConfig", storage_manager: StorageSessionManager, - model_id: uuid.UUID, + model_id: UUID, model_mount_destination: str, - extra_mounts: dict[uuid.UUID, MountOptionModel], + extra_mounts: dict[UUID, MountOptionModel], user_scope: UserScope, resource_policy: dict[str, Any], ) -> Sequence[VFolderMount]: @@ -716,12 +716,12 @@ async def check_extra_mounts( ) requested_mounts = [*extra_mounts.keys()] - requested_mount_map: dict[str | uuid.UUID, str] = { + requested_mount_map: dict[str | UUID, str] = { folder_id: options.mount_destination for folder_id, options in extra_mounts.items() if options.mount_destination } - requested_mount_options: dict[str | uuid.UUID, Any] = { + requested_mount_options: dict[str | UUID, Any] = { folder_id: { "type": options.type, "permission": options.permission, @@ -875,7 +875,7 @@ class EndpointStatistics: async def batch_load_by_endpoint_impl( cls, redis_stat: RedisConnectionInfo, - endpoint_ids: Sequence[uuid.UUID], + endpoint_ids: Sequence[UUID], ) -> Sequence[Optional[Mapping[str, Any]]]: async def _build_pipeline(redis: Redis) -> Pipeline: pipe = redis.pipeline() @@ -896,7 +896,7 @@ async def _build_pipeline(redis: Redis) -> Pipeline: async def batch_load_by_endpoint( cls, ctx: "GraphQueryContext", - endpoint_ids: Sequence[uuid.UUID], + endpoint_ids: Sequence[UUID], ) -> Sequence[Optional[Mapping[str, Any]]]: return await cls.batch_load_by_endpoint_impl(ctx.redis_stat, endpoint_ids) @@ -904,7 +904,7 @@ async def batch_load_by_endpoint( async def batch_load_by_replica( cls, ctx: GraphQueryContext, - endpoint_replica_ids: Sequence[tuple[uuid.UUID, uuid.UUID]], + endpoint_replica_ids: Sequence[tuple[UUID, UUID]], ) -> Sequence[Optional[Mapping[str, Any]]]: async def _build_pipeline(redis: Redis) -> Pipeline: pipe = redis.pipeline() @@ -1052,9 +1052,9 @@ async def load_count( cls, ctx, # ctx: GraphQueryContext, *, - project: uuid.UUID | None = None, + project: UUID | None = None, domain_name: Optional[str] = None, - user_uuid: Optional[uuid.UUID] = None, + user_uuid: Optional[UUID] = None, filter: Optional[str] = None, ) -> int: query = sa.select([sa.func.count()]).select_from( @@ -1087,8 +1087,8 @@ async def load_slice( offset: int, *, domain_name: Optional[str] = None, - user_uuid: Optional[uuid.UUID] = None, - project: Optional[uuid.UUID] = None, + user_uuid: Optional[UUID] = None, + project: Optional[UUID] = None, filter: Optional[str] = None, order: Optional[str] = None, ) -> Sequence[Self]: @@ -1134,8 +1134,8 @@ async def load_all( ctx, # ctx: GraphQueryContext, *, domain_name: Optional[str] = None, - user_uuid: Optional[uuid.UUID] = None, - project: Optional[uuid.UUID] = None, + user_uuid: Optional[UUID] = None, + project: Optional[UUID] = None, ) -> Sequence["Endpoint"]: async with ctx.db.begin_readonly_session() as session: rows = await EndpointRow.list( @@ -1154,10 +1154,10 @@ async def load_item( cls, ctx, # ctx: GraphQueryContext, *, - endpoint_id: uuid.UUID, + endpoint_id: UUID, domain_name: Optional[str] = None, - user_uuid: Optional[uuid.UUID] = None, - project: uuid.UUID | None = None, + user_uuid: Optional[UUID] = None, + project: UUID | None = None, ) -> Self: """ :raises: ai.backend.manager.api.exceptions.EndpointNotFound @@ -1323,7 +1323,7 @@ async def mutate( cls, root, info: graphene.ResolveInfo, - endpoint_id: uuid.UUID, + endpoint_id: UUID, props: ModifyEndpointInput, ) -> Self: graph_ctx: GraphQueryContext = info.context @@ -1423,11 +1423,11 @@ async def _do_mutate() -> Self: endpoint_row.project, ) - def _get_vfolder_id(id_input: str) -> uuid.UUID: + def _get_vfolder_id(id_input: str) -> UUID: _, raw_vfolder_id = AsyncNode.resolve_global_id(info, id_input) if not raw_vfolder_id: raw_vfolder_id = id_input - return uuid.UUID(raw_vfolder_id) + return UUID(raw_vfolder_id) user_scope = UserScope( domain_name=endpoint_row.domain, @@ -1586,10 +1586,10 @@ async def load_count( cls, ctx, # ctx: GraphQueryContext, *, - endpoint_id: Optional[uuid.UUID] = None, - project: Optional[uuid.UUID] = None, + endpoint_id: Optional[UUID] = None, + project: Optional[UUID] = None, domain_name: Optional[str] = None, - user_uuid: Optional[uuid.UUID] = None, + user_uuid: Optional[UUID] = None, ) -> int: query = sa.select([sa.func.count()]).select_from(EndpointTokenRow) if endpoint_id is not None: @@ -1611,12 +1611,12 @@ async def load_slice( limit: int, offset: int, *, - endpoint_id: Optional[uuid.UUID] = None, + endpoint_id: Optional[UUID] = None, filter: str | None = None, order: str | None = None, - project: Optional[uuid.UUID] = None, + project: Optional[UUID] = None, domain_name: Optional[str] = None, - user_uuid: Optional[uuid.UUID] = None, + user_uuid: Optional[UUID] = None, ) -> Sequence[Self]: query = ( sa.select(EndpointTokenRow) @@ -1648,11 +1648,11 @@ async def load_slice( async def load_all( cls, ctx: GraphQueryContext, - endpoint_id: uuid.UUID, + endpoint_id: UUID, *, - project: Optional[uuid.UUID] = None, + project: Optional[UUID] = None, domain_name: Optional[str] = None, - user_uuid: Optional[uuid.UUID] = None, + user_uuid: Optional[UUID] = None, ) -> Sequence[Self]: async with ctx.db.begin_readonly_session() as session: rows = await EndpointTokenRow.list( @@ -1670,9 +1670,9 @@ async def load_item( ctx, # ctx: GraphQueryContext, token: str, *, - project: Optional[uuid.UUID] = None, + project: Optional[UUID] = None, domain_name: Optional[str] = None, - user_uuid: Optional[uuid.UUID] = None, + user_uuid: Optional[UUID] = None, ) -> Self: try: async with ctx.db.begin_readonly_session() as session: diff --git a/src/ai/backend/manager/models/gql_models/endpoint.py b/src/ai/backend/manager/models/gql_models/endpoint.py index c4a98c03e76..f575f3eb36a 100644 --- a/src/ai/backend/manager/models/gql_models/endpoint.py +++ b/src/ai/backend/manager/models/gql_models/endpoint.py @@ -1,8 +1,8 @@ from __future__ import annotations import decimal -import uuid from typing import TYPE_CHECKING, Mapping, Self +from uuid import UUID import graphene from dateutil.parser import parse as dtparse @@ -131,7 +131,7 @@ async def get_node(cls, info: graphene.ResolveInfo, rule_id: str) -> Self: if not raw_rule_id: raw_rule_id = rule_id try: - _rule_id = uuid.UUID(raw_rule_id) + _rule_id = UUID(raw_rule_id) except ValueError: raise ObjectNotFound(object_name="Endpoint Autoscaling Rule") @@ -201,7 +201,7 @@ async def get_connection( if not raw_endpoint_id: raw_endpoint_id = endpoint try: - _endpoint_id = uuid.UUID(raw_endpoint_id) + _endpoint_id = UUID(raw_endpoint_id) except ValueError: raise ObjectNotFound(object_name="Endpoint") try: @@ -303,7 +303,7 @@ async def mutate( raise InvalidAPIParameters("comparator is a required field") try: - _endpoint_id = uuid.UUID(raw_endpoint_id) + _endpoint_id = UUID(raw_endpoint_id) except ValueError: raise ObjectNotFound(object_name="Endpoint") @@ -377,7 +377,7 @@ async def mutate( rule_id = id try: - _rule_id = uuid.UUID(rule_id) + _rule_id = UUID(rule_id) except ValueError: raise ObjectNotFound(object_name="Endpoint Autoscaling Rule") @@ -446,7 +446,7 @@ async def mutate( rule_id = id try: - _rule_id = uuid.UUID(rule_id) + _rule_id = UUID(rule_id) except ValueError: raise ObjectNotFound(object_name="Endpoint Autoscaling Rule") From 2c262b7ba6878ac4252cc62bd8f5106df0d0059f Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Fri, 17 Jan 2025 23:17:27 +0900 Subject: [PATCH 74/75] refactor: Use shorter UUID type phrase --- .../backend/manager/scheduler/dispatcher.py | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/ai/backend/manager/scheduler/dispatcher.py b/src/ai/backend/manager/scheduler/dispatcher.py index 94adc758fb5..54e77cc14a5 100644 --- a/src/ai/backend/manager/scheduler/dispatcher.py +++ b/src/ai/backend/manager/scheduler/dispatcher.py @@ -4,7 +4,6 @@ import itertools import json import logging -import uuid from collections import defaultdict from collections.abc import ( Awaitable, @@ -23,6 +22,7 @@ Union, cast, ) +from uuid import UUID, uuid4 import aiotools import async_timeout @@ -1393,16 +1393,14 @@ async def _autoscale_endpoints( endpoints = await EndpointRow.batch_load( session, [rule.endpoint for rule in rules], load_routes=True ) - endpoint_by_id: dict[uuid.UUID, EndpointRow] = { - endpoint.id: endpoint for endpoint in endpoints - } - metric_requested_sessions: list[uuid.UUID] = list() - metric_requested_kernels: list[uuid.UUID] = list() - metric_requested_endpoints: list[uuid.UUID] = list() + endpoint_by_id: dict[UUID, EndpointRow] = {endpoint.id: endpoint for endpoint in endpoints} + metric_requested_sessions: list[UUID] = list() + metric_requested_kernels: list[UUID] = list() + metric_requested_endpoints: list[UUID] = list() - kernel_statistics_by_id: dict[uuid.UUID, Any] = {} - endpoint_statistics_by_id: dict[uuid.UUID, Any] = {} - kernels_by_session_id: dict[uuid.UUID, list[KernelRow]] = defaultdict(lambda: []) + kernel_statistics_by_id: dict[UUID, Any] = {} + endpoint_statistics_by_id: dict[UUID, Any] = {} + kernels_by_session_id: dict[UUID, list[KernelRow]] = defaultdict(lambda: []) for rule in rules: match rule.metric_source: @@ -1691,7 +1689,7 @@ async def _autoscale_txn() -> None: for endpoint, expand_count in endpoints_to_expand.items(): log.debug("Creating {} session(s) for {}", expand_count, endpoint.name) for _ in range(expand_count): - route_id = uuid.uuid4() + route_id = uuid4() routing_row = RoutingRow( route_id, endpoint.id, From 20d106673a411bd3f7a4f0a15d7575be13a1bdc1 Mon Sep 17 00:00:00 2001 From: Joongi Kim Date: Fri, 17 Jan 2025 23:29:00 +0900 Subject: [PATCH 75/75] refactor: Use explicit subtype for ID values --- src/ai/backend/common/types.py | 25 ++++++++++--------- src/ai/backend/manager/models/endpoint.py | 3 ++- .../manager/models/gql_models/endpoint.py | 12 +++++---- .../backend/manager/scheduler/dispatcher.py | 20 ++++++++------- 4 files changed, 33 insertions(+), 27 deletions(-) diff --git a/src/ai/backend/common/types.py b/src/ai/backend/common/types.py index 2e6c7c2bf9f..25e5abb4a8d 100644 --- a/src/ai/backend/common/types.py +++ b/src/ai/backend/common/types.py @@ -7,7 +7,6 @@ import math import numbers import textwrap -import uuid from abc import ABCMeta, abstractmethod from collections import UserDict, defaultdict, namedtuple from collections.abc import Iterable @@ -40,6 +39,7 @@ overload, override, ) +from uuid import UUID import attrs import redis.asyncio.sentinel @@ -335,9 +335,10 @@ def check_typed_tuple(value: Tuple[Any, ...], types: Tuple[Type, ...]) -> Tuple: ContainerPID = NewType("ContainerPID", PID) ContainerId = NewType("ContainerId", str) -EndpointId = NewType("EndpointId", uuid.UUID) -SessionId = NewType("SessionId", uuid.UUID) -KernelId = NewType("KernelId", uuid.UUID) +EndpointId = NewType("EndpointId", UUID) +RuleId = NewType("RuleId", UUID) +SessionId = NewType("SessionId", UUID) +KernelId = NewType("KernelId", UUID) ImageAlias = NewType("ImageAlias", str) ArchName = NewType("ArchName", str) @@ -976,20 +977,20 @@ def as_trafaret(cls) -> t.Trafaret: @attrs.define(slots=True, frozen=True) class QuotaScopeID: scope_type: QuotaScopeType - scope_id: uuid.UUID + scope_id: UUID @classmethod def parse(cls, raw: str) -> QuotaScopeID: scope_type, _, rest = raw.partition(":") match scope_type.lower(): case QuotaScopeType.PROJECT | QuotaScopeType.USER as t: - return cls(t, uuid.UUID(rest)) + return cls(t, UUID(rest)) case _: raise ValueError(f"Invalid quota scope type: {scope_type!r}") def __str__(self) -> str: match self.scope_id: - case uuid.UUID(): + case UUID(): return f"{self.scope_type}:{str(self.scope_id)}" case _: raise ValueError(f"Invalid quota scope ID: {self.scope_id!r}") @@ -1000,7 +1001,7 @@ def __repr__(self) -> str: @property def pathname(self) -> str: match self.scope_id: - case uuid.UUID(): + case UUID(): return self.scope_id.hex case _: raise ValueError(f"Invalid quota scope ID: {self.scope_id!r}") @@ -1008,7 +1009,7 @@ def pathname(self) -> str: class VFolderID: quota_scope_id: QuotaScopeID | None - folder_id: uuid.UUID + folder_id: UUID @classmethod def from_row(cls, row: Any) -> Self: @@ -1018,11 +1019,11 @@ def from_row(cls, row: Any) -> Self: def from_str(cls, val: str) -> Self: first, _, second = val.partition("/") if second: - return cls(QuotaScopeID.parse(first), uuid.UUID(hex=second)) + return cls(QuotaScopeID.parse(first), UUID(hex=second)) else: - return cls(None, uuid.UUID(hex=first)) + return cls(None, UUID(hex=first)) - def __init__(self, quota_scope_id: QuotaScopeID | str | None, folder_id: uuid.UUID) -> None: + def __init__(self, quota_scope_id: QuotaScopeID | str | None, folder_id: UUID) -> None: self.folder_id = folder_id match quota_scope_id: case QuotaScopeID(): diff --git a/src/ai/backend/manager/models/endpoint.py b/src/ai/backend/manager/models/endpoint.py index 36a010c18e1..f2d260bff40 100644 --- a/src/ai/backend/manager/models/endpoint.py +++ b/src/ai/backend/manager/models/endpoint.py @@ -42,6 +42,7 @@ AutoScalingMetricComparator, AutoScalingMetricSource, ClusterMode, + EndpointId, ImageAlias, MountPermission, MountTypes, @@ -381,7 +382,7 @@ async def list( async def batch_load( cls, session: AsyncSession, - endpoint_ids: Sequence[UUID], + endpoint_ids: Sequence[EndpointId], domain: Optional[str] = None, project: Optional[UUID] = None, user_uuid: Optional[UUID] = None, diff --git a/src/ai/backend/manager/models/gql_models/endpoint.py b/src/ai/backend/manager/models/gql_models/endpoint.py index f575f3eb36a..b4501d60cbe 100644 --- a/src/ai/backend/manager/models/gql_models/endpoint.py +++ b/src/ai/backend/manager/models/gql_models/endpoint.py @@ -13,6 +13,8 @@ from ai.backend.common.types import ( AutoScalingMetricComparator, AutoScalingMetricSource, + EndpointId, + RuleId, ) from ...api.exceptions import ( @@ -131,7 +133,7 @@ async def get_node(cls, info: graphene.ResolveInfo, rule_id: str) -> Self: if not raw_rule_id: raw_rule_id = rule_id try: - _rule_id = UUID(raw_rule_id) + _rule_id = RuleId(UUID(raw_rule_id)) except ValueError: raise ObjectNotFound(object_name="Endpoint Autoscaling Rule") @@ -201,7 +203,7 @@ async def get_connection( if not raw_endpoint_id: raw_endpoint_id = endpoint try: - _endpoint_id = UUID(raw_endpoint_id) + _endpoint_id = EndpointId(UUID(raw_endpoint_id)) except ValueError: raise ObjectNotFound(object_name="Endpoint") try: @@ -303,7 +305,7 @@ async def mutate( raise InvalidAPIParameters("comparator is a required field") try: - _endpoint_id = UUID(raw_endpoint_id) + _endpoint_id = EndpointId(UUID(raw_endpoint_id)) except ValueError: raise ObjectNotFound(object_name="Endpoint") @@ -377,7 +379,7 @@ async def mutate( rule_id = id try: - _rule_id = UUID(rule_id) + _rule_id = RuleId(UUID(rule_id)) except ValueError: raise ObjectNotFound(object_name="Endpoint Autoscaling Rule") @@ -446,7 +448,7 @@ async def mutate( rule_id = id try: - _rule_id = UUID(rule_id) + _rule_id = RuleId(UUID(rule_id)) except ValueError: raise ObjectNotFound(object_name="Endpoint Autoscaling Rule") diff --git a/src/ai/backend/manager/scheduler/dispatcher.py b/src/ai/backend/manager/scheduler/dispatcher.py index 54e77cc14a5..63cfee21573 100644 --- a/src/ai/backend/manager/scheduler/dispatcher.py +++ b/src/ai/backend/manager/scheduler/dispatcher.py @@ -22,7 +22,7 @@ Union, cast, ) -from uuid import UUID, uuid4 +from uuid import uuid4 import aiotools import async_timeout @@ -63,6 +63,8 @@ AutoScalingMetricComparator, AutoScalingMetricSource, ClusterMode, + EndpointId, + KernelId, RedisConnectionInfo, ResourceSlot, SessionId, @@ -1393,14 +1395,14 @@ async def _autoscale_endpoints( endpoints = await EndpointRow.batch_load( session, [rule.endpoint for rule in rules], load_routes=True ) - endpoint_by_id: dict[UUID, EndpointRow] = {endpoint.id: endpoint for endpoint in endpoints} - metric_requested_sessions: list[UUID] = list() - metric_requested_kernels: list[UUID] = list() - metric_requested_endpoints: list[UUID] = list() - - kernel_statistics_by_id: dict[UUID, Any] = {} - endpoint_statistics_by_id: dict[UUID, Any] = {} - kernels_by_session_id: dict[UUID, list[KernelRow]] = defaultdict(lambda: []) + endpoint_by_id = {endpoint.id: endpoint for endpoint in endpoints} + metric_requested_sessions: list[SessionId] = [] + metric_requested_kernels: list[KernelId] = [] + metric_requested_endpoints: list[EndpointId] = [] + + kernel_statistics_by_id: dict[KernelId, Any] = {} + endpoint_statistics_by_id: dict[EndpointId, Any] = {} + kernels_by_session_id: dict[SessionId, list[KernelRow]] = defaultdict(lambda: []) for rule in rules: match rule.metric_source: