Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(BA-96): metric based model service autoscaling #3277

Open
wants to merge 46 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
4f41b1f
update
kyujin-cho Dec 19, 2024
22cbde8
add gql query & mutations
kyujin-cho Dec 19, 2024
edb16dc
add migration script
kyujin-cho Dec 19, 2024
79ad37e
add missing file
kyujin-cho Dec 19, 2024
d044f4b
Merge branch 'main' into feature/model-service-autoscale
kyujin-cho Dec 19, 2024
9bc0661
add news fragment
kyujin-cho Dec 19, 2024
66f28e7
chore: update GraphQL schema dump
kyujin-cho Dec 20, 2024
d494709
add min/max replicas
kyujin-cho Dec 20, 2024
d1b0231
chore: update GraphQL schema dump
kyujin-cho Dec 20, 2024
11f57e0
add min/max replicas
kyujin-cho Dec 20, 2024
d533ba0
add missing annotations
kyujin-cho Dec 20, 2024
9de5bf6
chore: update GraphQL schema dump
kyujin-cho Dec 20, 2024
f840c44
Merge branch 'main' into feature/model-service-autoscale
achimnol Dec 26, 2024
0574c2e
Merge branch 'main' into feature/model-service-autoscale
achimnol Dec 26, 2024
e6209d4
replace bulk with batch
kyujin-cho Dec 26, 2024
d0a0af8
fix: Remove unnecessary version-added notes in new GQL object type's …
achimnol Dec 30, 2024
bb7db5e
chore: update GraphQL schema dump
achimnol Dec 30, 2024
cf9f1ee
fix: Ensure floating-point format when stringifying decimal column va…
achimnol Dec 30, 2024
dc341d8
fix: Typo in the database column
achimnol Dec 30, 2024
846f13d
fix: Missing await
achimnol Dec 30, 2024
44b7c5a
Merge branch 'main' into feature/model-service-autoscale
achimnol Dec 30, 2024
580fb8e
fix: Remove boilerplate constructor
achimnol Dec 30, 2024
913569e
fix: Let StrEnum-based fields accept values, not capitalized names
achimnol Dec 30, 2024
0ddf2e8
chore: update GraphQL schema dump
achimnol Dec 30, 2024
3d4563a
refactor: We have typing.Self
achimnol Dec 30, 2024
3a60225
fix,refactor: Use simpler types and fix missing await in session.dele…
achimnol Dec 30, 2024
58dbb9d
fix: We don't need __init__() boilerplate!
achimnol Dec 30, 2024
a3dddf6
refactor: Use simpler types
achimnol Dec 30, 2024
ec0ffa4
Merge branch 'main' into feature/model-service-autoscale
achimnol Dec 31, 2024
33dcce8
accept PR review
kyujin-cho Jan 2, 2025
089028a
fix graphene Enum misuse
kyujin-cho Jan 2, 2025
9b47811
implement CLI function
kyujin-cho Jan 2, 2025
0baf316
implement CLI function
kyujin-cho Jan 2, 2025
56798a4
fix invalid import
kyujin-cho Jan 2, 2025
310bb44
fix invalid GQL definitioN
kyujin-cho Jan 2, 2025
353e888
fix typo
kyujin-cho Jan 2, 2025
1f7d22b
Merge branch 'main' into feature/model-service-autoscale
kyujin-cho Jan 2, 2025
fd04771
chore: update GraphQL schema dump
kyujin-cho Jan 2, 2025
daa0530
update annotation
kyujin-cho Jan 2, 2025
f11f7a6
chore: update GraphQL schema dump
kyujin-cho Jan 2, 2025
4342869
restructure CLI
kyujin-cho Jan 2, 2025
e6bf903
fix typo
kyujin-cho Dec 20, 2024
de6e6e7
fix typo
kyujin-cho Dec 20, 2024
274b390
fix cli not working
kyujin-cho Jan 2, 2025
32d469a
Merge branch 'main' into feature/model-service-autoscale
kyujin-cho Jan 2, 2025
0fa8de8
chore: update GraphQL schema dump
kyujin-cho Jan 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/3277.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Support model service auto scaling
1 change: 1 addition & 0 deletions src/ai/backend/client/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from . import model # noqa # type: ignore
from . import server_log # noqa # type: ignore
from . import service # noqa # type: ignore
from . import service_auto_scaling_rule # noqa # type: ignore
from . import session # noqa # type: ignore
from . import session_template # noqa # type: ignore
from . import vfolder # noqa # type: ignore
Expand Down
243 changes: 243 additions & 0 deletions src/ai/backend/client/cli/service_auto_scaling_rule.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
import decimal
import sys
import uuid
from typing import Any, Iterable, Optional

import click

from ai.backend.cli.params import OptionalType
from ai.backend.cli.types import ExitCode, Undefined, undefined
from ai.backend.client.cli.extensions import pass_ctx_obj
from ai.backend.client.cli.service import get_service_id
from ai.backend.client.cli.types import CLIContext
from ai.backend.client.exceptions import BackendAPIError
from ai.backend.client.session import Session
from ai.backend.common.types import AutoScalingMetricComparator, AutoScalingMetricSource

from ..func.service_auto_scaling_rule import _default_fields as _default_get_fields
from ..output.fields import service_auto_scaling_rule_fields
from .pretty import print_done
from .service import service

_default_list_fields = (
service_auto_scaling_rule_fields["id"],
service_auto_scaling_rule_fields["metric_source"],
service_auto_scaling_rule_fields["metric_name"],
service_auto_scaling_rule_fields["comparator"],
service_auto_scaling_rule_fields["threshold"],
)


@service.group()
def auto_scaling_rule():
"""Set of model service auto scaling rule operations"""


@auto_scaling_rule.command()
@pass_ctx_obj
@click.argument("service", type=str, metavar="SERVICE_NAME_OR_ID")
@click.option("--metric-source", type=click.Choice([*AutoScalingMetricSource]), required=True)
@click.option("--metric-name", type=str, required=True)
@click.option("--threshold", type=str, required=True)
@click.option("--comparator", type=click.Choice([*AutoScalingMetricComparator]), required=True)
@click.option("--step-size", type=int, required=True)
@click.option("--cooldown-seconds", type=int, required=True)
@click.option("--min-replicas", type=int)
@click.option("--max-replicas", type=int)
def create(
ctx: CLIContext,
service: str,
*,
metric_source: AutoScalingMetricSource,
metric_name: str,
threshold: str,
comparator: AutoScalingMetricComparator,
step_size: int,
cooldown_seconds: int,
min_replicas: Optional[int] = None,
max_replicas: Optional[int] = None,
) -> None:
"""Create a new auto scaling rule."""

with Session() as session:
try:
_threshold = decimal.Decimal(threshold)
except decimal.InvalidOperation:
ctx.output.print_fail(f"{threshold} is not a valid Decimal")
sys.exit(ExitCode.FAILURE)

try:
service_id = uuid.UUID(get_service_id(session, service))
rule = session.ServiceAutoScalingRule.create(
service_id,
metric_source,
metric_name,
_threshold,
comparator,
step_size,
cooldown_seconds,
min_replicas=min_replicas,
max_replicas=max_replicas,
)
print_done(f"Auto Scaling Rule (ID {rule.rule_id}) created.")
except Exception as e:
ctx.output.print_error(e)
sys.exit(ExitCode.FAILURE)


@auto_scaling_rule.command()
@pass_ctx_obj
@click.argument("service", type=str, metavar="SERVICE_NAME_OR_ID")
@click.option(
"-f",
"--format",
default=None,
help="Display only specified fields. When specifying multiple fields separate them with comma (,).",
)
@click.option("--filter", "filter_", default=None, help="Set the query filter expression.")
@click.option("--order", default=None, help="Set the query ordering expression.")
@click.option("--offset", default=0, help="The index of the current page start for pagination.")
@click.option("--limit", type=int, default=None, help="The page size for pagination.")
def list(ctx: CLIContext, service: str, format, filter_, order, offset, limit):
"""List all set auto scaling rules for given model service."""

if format:
try:
fields = [service_auto_scaling_rule_fields[f.strip()] for f in format.split(",")]
except KeyError as e:
ctx.output.print_fail(f"Field {str(e)} not found")
sys.exit(ExitCode.FAILURE)
else:
fields = None
with Session() as session:
service_id = uuid.UUID(get_service_id(session, service))

try:
fetch_func = lambda pg_offset, pg_size: session.ServiceAutoScalingRule.paginated_list(
service_id,
page_offset=pg_offset,
page_size=pg_size,
filter=filter_,
order=order,
fields=fields,
)
ctx.output.print_paginated_list(
fetch_func,
initial_page_offset=offset,
page_size=limit,
)
except Exception as e:
ctx.output.print_error(e)
sys.exit(ExitCode.FAILURE)


@auto_scaling_rule.command()
@pass_ctx_obj
@click.argument("rule", type=str, metavar="RULE_ID")
@click.option(
"-f",
"--format",
default=None,
help="Display only specified fields. When specifying multiple fields separate them with comma (,).",
)
def get(ctx: CLIContext, rule, format):
"""Prints attributes of given auto scaling rule."""
fields: Iterable[Any]
if format:
try:
fields = [service_auto_scaling_rule_fields[f.strip()] for f in format.split(",")]
except KeyError as e:
ctx.output.print_fail(f"Field {str(e)} not found")
sys.exit(ExitCode.FAILURE)
else:
fields = _default_get_fields

with Session() as session:
try:
rule_info = session.ServiceAutoScalingRule(uuid.UUID(rule)).get(fields=fields)
except (ValueError, BackendAPIError):
ctx.output.print_fail(f"Network {rule} not found.")
sys.exit(ExitCode.FAILURE)

ctx.output.print_item(rule_info, fields)


@auto_scaling_rule.command()
@pass_ctx_obj
@click.argument("rule", type=str, metavar="RULE_ID")
@click.option("--metric-source", type=OptionalType(AutoScalingMetricSource), default=undefined)
@click.option("--metric-name", type=OptionalType(str), default=undefined)
@click.option("--threshold", type=OptionalType(str), default=undefined)
@click.option("--comparator", type=OptionalType(AutoScalingMetricComparator), default=undefined)
@click.option("--step-size", type=OptionalType(int), default=undefined)
@click.option("--cooldown-seconds", type=OptionalType(int), default=undefined)
@click.option(
"--min-replicas",
type=OptionalType(int),
help="Set as -1 to remove min_replicas restriction.",
default=undefined,
)
@click.option(
"--max-replicas",
type=OptionalType(int),
help="Set as -1 to remove max_replicas restriction.",
default=undefined,
)
def update(
ctx: CLIContext,
rule: str,
*,
metric_source: str | Undefined,
metric_name: str | Undefined,
threshold: str | Undefined,
comparator: str | Undefined,
step_size: int | Undefined,
cooldown_seconds: int | Undefined,
min_replicas: Optional[int] | Undefined,
max_replicas: Optional[int] | Undefined,
):
with Session() as session:
try:
_threshold = decimal.Decimal(threshold) if threshold != undefined else undefined
except decimal.InvalidOperation:
ctx.output.print_fail(f"{threshold} is not a valid Decimal")
sys.exit(ExitCode.FAILURE)

if min_replicas == -1:
min_replicas = None
if max_replicas == -1:
max_replicas = None

try:
_rule = session.ServiceAutoScalingRule(uuid.UUID(rule))
_rule.get()
_rule.update(
metric_source=metric_source,
metric_name=metric_name,
threshold=_threshold,
comparator=comparator,
step_size=step_size,
cooldown_seconds=cooldown_seconds,
min_replicas=min_replicas,
max_replicas=max_replicas,
)
print_done(f"Auto Scaling Rule (ID {_rule.rule_id}) updated.")
except BackendAPIError as e:
ctx.output.print_fail(e.data["title"])
sys.exit(ExitCode.FAILURE)


@auto_scaling_rule.command()
@pass_ctx_obj
@click.argument("rule", type=str, metavar="NETWORK_ID_OR_NAME")
def delete(ctx: CLIContext, rule):
with Session() as session:
rule = session.ServiceAutoScalingRule(uuid.UUID(rule))
try:
rule.get(fields=[service_auto_scaling_rule_fields["id"]])
rule.delete()
print_done(f"Auto scaling rule {rule.rule_id} has been deleted.")
except BackendAPIError as e:
ctx.output.print_fail(f"Failed to delete rule {rule.rule_id}:")
ctx.output.print_error(e)
sys.exit(ExitCode.FAILURE)
Loading
Loading