From 961af9b24f3eabd22c9fe285805c2849bf23eeb2 Mon Sep 17 00:00:00 2001 From: awstools Date: Thu, 25 Jul 2024 19:38:56 +0000 Subject: [PATCH] feat(client-application-auto-scaling): Application Auto Scaling is now more responsive to the changes in demand of your SageMaker Inference endpoints. To get started, create or update a Target Tracking policy based on High Resolution CloudWatch metrics. --- .../src/commands/DescribeScalingPoliciesCommand.ts | 2 +- .../src/commands/PutScalingPolicyCommand.ts | 2 +- .../src/models/models_0.ts | 3 +++ .../aws-models/application-auto-scaling.json | 12 ++++++++++++ 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/clients/client-application-auto-scaling/src/commands/DescribeScalingPoliciesCommand.ts b/clients/client-application-auto-scaling/src/commands/DescribeScalingPoliciesCommand.ts index b1a518aac8699..557df10f3ac0b 100644 --- a/clients/client-application-auto-scaling/src/commands/DescribeScalingPoliciesCommand.ts +++ b/clients/client-application-auto-scaling/src/commands/DescribeScalingPoliciesCommand.ts @@ -79,7 +79,7 @@ export interface DescribeScalingPoliciesCommandOutput extends DescribeScalingPol * // TargetTrackingScalingPolicyConfiguration: { // TargetTrackingScalingPolicyConfiguration * // TargetValue: Number("double"), // required * // PredefinedMetricSpecification: { // PredefinedMetricSpecification - * // PredefinedMetricType: "DynamoDBReadCapacityUtilization" || "DynamoDBWriteCapacityUtilization" || "ALBRequestCountPerTarget" || "RDSReaderAverageCPUUtilization" || "RDSReaderAverageDatabaseConnections" || "EC2SpotFleetRequestAverageCPUUtilization" || "EC2SpotFleetRequestAverageNetworkIn" || "EC2SpotFleetRequestAverageNetworkOut" || "SageMakerVariantInvocationsPerInstance" || "ECSServiceAverageCPUUtilization" || "ECSServiceAverageMemoryUtilization" || "AppStreamAverageCapacityUtilization" || "ComprehendInferenceUtilization" || "LambdaProvisionedConcurrencyUtilization" || "CassandraReadCapacityUtilization" || "CassandraWriteCapacityUtilization" || "KafkaBrokerStorageUtilization" || "ElastiCachePrimaryEngineCPUUtilization" || "ElastiCacheReplicaEngineCPUUtilization" || "ElastiCacheDatabaseMemoryUsageCountedForEvictPercentage" || "NeptuneReaderAverageCPUUtilization" || "SageMakerVariantProvisionedConcurrencyUtilization" || "ElastiCacheDatabaseCapacityUsageCountedForEvictPercentage" || "SageMakerInferenceComponentInvocationsPerCopy" || "WorkSpacesAverageUserSessionsCapacityUtilization", // required + * // PredefinedMetricType: "DynamoDBReadCapacityUtilization" || "DynamoDBWriteCapacityUtilization" || "ALBRequestCountPerTarget" || "RDSReaderAverageCPUUtilization" || "RDSReaderAverageDatabaseConnections" || "EC2SpotFleetRequestAverageCPUUtilization" || "EC2SpotFleetRequestAverageNetworkIn" || "EC2SpotFleetRequestAverageNetworkOut" || "SageMakerVariantInvocationsPerInstance" || "ECSServiceAverageCPUUtilization" || "ECSServiceAverageMemoryUtilization" || "AppStreamAverageCapacityUtilization" || "ComprehendInferenceUtilization" || "LambdaProvisionedConcurrencyUtilization" || "CassandraReadCapacityUtilization" || "CassandraWriteCapacityUtilization" || "KafkaBrokerStorageUtilization" || "ElastiCachePrimaryEngineCPUUtilization" || "ElastiCacheReplicaEngineCPUUtilization" || "ElastiCacheDatabaseMemoryUsageCountedForEvictPercentage" || "NeptuneReaderAverageCPUUtilization" || "SageMakerVariantProvisionedConcurrencyUtilization" || "ElastiCacheDatabaseCapacityUsageCountedForEvictPercentage" || "SageMakerInferenceComponentInvocationsPerCopy" || "WorkSpacesAverageUserSessionsCapacityUtilization" || "SageMakerInferenceComponentConcurrentRequestsPerCopyHighResolution" || "SageMakerVariantConcurrentRequestsPerModelHighResolution", // required * // ResourceLabel: "STRING_VALUE", * // }, * // CustomizedMetricSpecification: { // CustomizedMetricSpecification diff --git a/clients/client-application-auto-scaling/src/commands/PutScalingPolicyCommand.ts b/clients/client-application-auto-scaling/src/commands/PutScalingPolicyCommand.ts index dc4649424e9f3..ef8a79394fd67 100644 --- a/clients/client-application-auto-scaling/src/commands/PutScalingPolicyCommand.ts +++ b/clients/client-application-auto-scaling/src/commands/PutScalingPolicyCommand.ts @@ -85,7 +85,7 @@ export interface PutScalingPolicyCommandOutput extends PutScalingPolicyResponse, * TargetTrackingScalingPolicyConfiguration: { // TargetTrackingScalingPolicyConfiguration * TargetValue: Number("double"), // required * PredefinedMetricSpecification: { // PredefinedMetricSpecification - * PredefinedMetricType: "DynamoDBReadCapacityUtilization" || "DynamoDBWriteCapacityUtilization" || "ALBRequestCountPerTarget" || "RDSReaderAverageCPUUtilization" || "RDSReaderAverageDatabaseConnections" || "EC2SpotFleetRequestAverageCPUUtilization" || "EC2SpotFleetRequestAverageNetworkIn" || "EC2SpotFleetRequestAverageNetworkOut" || "SageMakerVariantInvocationsPerInstance" || "ECSServiceAverageCPUUtilization" || "ECSServiceAverageMemoryUtilization" || "AppStreamAverageCapacityUtilization" || "ComprehendInferenceUtilization" || "LambdaProvisionedConcurrencyUtilization" || "CassandraReadCapacityUtilization" || "CassandraWriteCapacityUtilization" || "KafkaBrokerStorageUtilization" || "ElastiCachePrimaryEngineCPUUtilization" || "ElastiCacheReplicaEngineCPUUtilization" || "ElastiCacheDatabaseMemoryUsageCountedForEvictPercentage" || "NeptuneReaderAverageCPUUtilization" || "SageMakerVariantProvisionedConcurrencyUtilization" || "ElastiCacheDatabaseCapacityUsageCountedForEvictPercentage" || "SageMakerInferenceComponentInvocationsPerCopy" || "WorkSpacesAverageUserSessionsCapacityUtilization", // required + * PredefinedMetricType: "DynamoDBReadCapacityUtilization" || "DynamoDBWriteCapacityUtilization" || "ALBRequestCountPerTarget" || "RDSReaderAverageCPUUtilization" || "RDSReaderAverageDatabaseConnections" || "EC2SpotFleetRequestAverageCPUUtilization" || "EC2SpotFleetRequestAverageNetworkIn" || "EC2SpotFleetRequestAverageNetworkOut" || "SageMakerVariantInvocationsPerInstance" || "ECSServiceAverageCPUUtilization" || "ECSServiceAverageMemoryUtilization" || "AppStreamAverageCapacityUtilization" || "ComprehendInferenceUtilization" || "LambdaProvisionedConcurrencyUtilization" || "CassandraReadCapacityUtilization" || "CassandraWriteCapacityUtilization" || "KafkaBrokerStorageUtilization" || "ElastiCachePrimaryEngineCPUUtilization" || "ElastiCacheReplicaEngineCPUUtilization" || "ElastiCacheDatabaseMemoryUsageCountedForEvictPercentage" || "NeptuneReaderAverageCPUUtilization" || "SageMakerVariantProvisionedConcurrencyUtilization" || "ElastiCacheDatabaseCapacityUsageCountedForEvictPercentage" || "SageMakerInferenceComponentInvocationsPerCopy" || "WorkSpacesAverageUserSessionsCapacityUtilization" || "SageMakerInferenceComponentConcurrentRequestsPerCopyHighResolution" || "SageMakerVariantConcurrentRequestsPerModelHighResolution", // required * ResourceLabel: "STRING_VALUE", * }, * CustomizedMetricSpecification: { // CustomizedMetricSpecification diff --git a/clients/client-application-auto-scaling/src/models/models_0.ts b/clients/client-application-auto-scaling/src/models/models_0.ts index b625e23ed00ab..f42678cc52cdf 100644 --- a/clients/client-application-auto-scaling/src/models/models_0.ts +++ b/clients/client-application-auto-scaling/src/models/models_0.ts @@ -2534,7 +2534,10 @@ export const MetricType = { NeptuneReaderAverageCPUUtilization: "NeptuneReaderAverageCPUUtilization", RDSReaderAverageCPUUtilization: "RDSReaderAverageCPUUtilization", RDSReaderAverageDatabaseConnections: "RDSReaderAverageDatabaseConnections", + SageMakerInferenceComponentConcurrentRequestsPerCopyHighResolution: + "SageMakerInferenceComponentConcurrentRequestsPerCopyHighResolution", SageMakerInferenceComponentInvocationsPerCopy: "SageMakerInferenceComponentInvocationsPerCopy", + SageMakerVariantConcurrentRequestsPerModelHighResolution: "SageMakerVariantConcurrentRequestsPerModelHighResolution", SageMakerVariantInvocationsPerInstance: "SageMakerVariantInvocationsPerInstance", SageMakerVariantProvisionedConcurrencyUtilization: "SageMakerVariantProvisionedConcurrencyUtilization", WorkSpacesAverageUserSessionsCapacityUtilization: "WorkSpacesAverageUserSessionsCapacityUtilization", diff --git a/codegen/sdk-codegen/aws-models/application-auto-scaling.json b/codegen/sdk-codegen/aws-models/application-auto-scaling.json index 612469756abae..4fff85b21cb6d 100644 --- a/codegen/sdk-codegen/aws-models/application-auto-scaling.json +++ b/codegen/sdk-codegen/aws-models/application-auto-scaling.json @@ -2379,6 +2379,18 @@ "traits": { "smithy.api#enumValue": "WorkSpacesAverageUserSessionsCapacityUtilization" } + }, + "SageMakerInferenceComponentConcurrentRequestsPerCopyHighResolution": { + "target": "smithy.api#Unit", + "traits": { + "smithy.api#enumValue": "SageMakerInferenceComponentConcurrentRequestsPerCopyHighResolution" + } + }, + "SageMakerVariantConcurrentRequestsPerModelHighResolution": { + "target": "smithy.api#Unit", + "traits": { + "smithy.api#enumValue": "SageMakerVariantConcurrentRequestsPerModelHighResolution" + } } } },