diff --git a/internal/service/sagemaker/model.go b/internal/service/sagemaker/model.go index 5c469fd4ef49..ca76ac468f59 100644 --- a/internal/service/sagemaker/model.go +++ b/internal/service/sagemaker/model.go @@ -145,12 +145,48 @@ func ResourceModel() *schema.Resource { ForceNew: true, ValidateFunc: validation.StringInSlice(sagemaker.ModelCompressionType_Values(), false), }, + "model_access_config": { + Type: schema.TypeList, + Optional: true, + MaxItems: 1, + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "accept_eula": { + Type: schema.TypeBool, + Required: true, + ForceNew: true, + }, + }, + }, + }, }, }, }, }, }, }, + "inference_specification_name": { + Type: schema.TypeString, + Optional: true, + ForceNew: true, + ValidateFunc: validName, + }, + "multi_model_config": { + Type: schema.TypeList, + Optional: true, + ForceNew: true, + MaxItems: 1, + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "model_cache_setting": { + Type: schema.TypeString, + Optional: true, + ForceNew: true, + ValidateFunc: validation.StringInSlice(sagemaker.ModelCacheSetting_Values(), false), + }, + }, + }, + }, }, }, }, @@ -292,12 +328,49 @@ func ResourceModel() *schema.Resource { ForceNew: true, ValidateFunc: validation.StringInSlice(sagemaker.ModelCompressionType_Values(), false), }, + "model_access_config": { + Type: schema.TypeList, + Optional: true, + ForceNew: true, + MaxItems: 1, + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "accept_eula": { + Type: schema.TypeBool, + Required: true, + ForceNew: true, + }, + }, + }, + }, }, }, }, }, }, }, + "inference_specification_name": { + Type: schema.TypeString, + Optional: true, + ForceNew: true, + ValidateFunc: validName, + }, + "multi_model_config": { + Type: schema.TypeList, + Optional: true, + ForceNew: true, + MaxItems: 1, + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "model_cache_setting": { + Type: schema.TypeString, + Optional: true, + ForceNew: true, + ValidateFunc: validation.StringInSlice(sagemaker.ModelCacheSetting_Values(), false), + }, + }, + }, + }, }, }, }, @@ -521,6 +594,14 @@ func expandContainer(m map[string]interface{}) *sagemaker.ContainerDefinition { container.ImageConfig = expandModelImageConfig(v.([]interface{})) } + if v, ok := m["inference_specification_name"]; ok && v.(string) != "" { + container.InferenceSpecificationName = aws.String(v.(string)) + } + + if v, ok := m["multi_model_config"].([]interface{}); ok && len(v) > 0 { + container.MultiModelConfig = expandMultiModelConfig(v) + } + return &container } @@ -559,6 +640,10 @@ func expandS3ModelDataSource(l []interface{}) *sagemaker.S3ModelDataSource { s3ModelDataSource.CompressionType = aws.String(v.(string)) } + if v, ok := m["model_access_config"].([]interface{}); ok && len(v) > 0 { + s3ModelDataSource.ModelAccessConfig = expandModelAccessConfig(v) + } + return &s3ModelDataSource } @@ -604,6 +689,38 @@ func expandContainers(a []interface{}) []*sagemaker.ContainerDefinition { return containers } +func expandModelAccessConfig(l []interface{}) *sagemaker.ModelAccessConfig { + if len(l) == 0 { + return nil + } + + m := l[0].(map[string]interface{}) + + modelAccessConfig := &sagemaker.ModelAccessConfig{} + + if v, ok := m["accept_eula"].(bool); ok { + modelAccessConfig.AcceptEula = aws.Bool(v) + } + + return modelAccessConfig +} + +func expandMultiModelConfig(l []interface{}) *sagemaker.MultiModelConfig { + if len(l) == 0 { + return nil + } + + m := l[0].(map[string]interface{}) + + multiModelConfig := &sagemaker.MultiModelConfig{} + + if v, ok := m["model_cache_setting"].(string); ok && v != "" { + multiModelConfig.ModelCacheSetting = aws.String(v) + } + + return multiModelConfig +} + func flattenContainer(container *sagemaker.ContainerDefinition) []interface{} { if container == nil { return []interface{}{} @@ -634,11 +751,18 @@ func flattenContainer(container *sagemaker.ContainerDefinition) []interface{} { if container.Environment != nil { cfg["environment"] = aws.StringValueMap(container.Environment) } - if container.ImageConfig != nil { cfg["image_config"] = flattenImageConfig(container.ImageConfig) } + if container.InferenceSpecificationName != nil { + cfg["inference_specification_name"] = aws.StringValue(container.InferenceSpecificationName) + } + + if container.MultiModelConfig != nil { + cfg["multi_model_config"] = flattenMultiModelConfig(container.MultiModelConfig) + } + return []interface{}{cfg} } @@ -673,6 +797,10 @@ func flattenS3ModelDataSource(s3ModelDataSource *sagemaker.S3ModelDataSource) [] cfg["compression_type"] = aws.StringValue(s3ModelDataSource.CompressionType) } + if s3ModelDataSource.ModelAccessConfig != nil { + cfg["model_access_config"] = flattenModelAccessConfig(s3ModelDataSource.ModelAccessConfig) + } + return []interface{}{cfg} } @@ -714,6 +842,30 @@ func flattenContainers(containers []*sagemaker.ContainerDefinition) []interface{ return fContainers } +func flattenModelAccessConfig(config *sagemaker.ModelAccessConfig) []interface{} { + if config == nil { + return []interface{}{} + } + + cfg := make(map[string]interface{}) + + cfg["accept_eula"] = aws.BoolValue(config.AcceptEula) + + return []interface{}{cfg} +} + +func flattenMultiModelConfig(config *sagemaker.MultiModelConfig) []interface{} { + if config == nil { + return []interface{}{} + } + + cfg := make(map[string]interface{}) + + cfg["model_cache_setting"] = aws.StringValue(config.ModelCacheSetting) + + return []interface{}{cfg} +} + func expandModelInferenceExecutionConfig(l []interface{}) *sagemaker.InferenceExecutionConfig { if len(l) == 0 { return nil diff --git a/internal/service/sagemaker/model_test.go b/internal/service/sagemaker/model_test.go index dd3da23b3000..48429f0bce3d 100644 --- a/internal/service/sagemaker/model_test.go +++ b/internal/service/sagemaker/model_test.go @@ -507,6 +507,114 @@ func testAccCheckModelExists(ctx context.Context, n string) resource.TestCheckFu } } +func testAccModel_primaryContainerModelS3DataSourceAcceptEula(t *testing.T) { + ctx := acctest.Context(t) + rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix) + resourceName := "aws_sagemaker_model.test" + + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { acctest.PreCheck(ctx, t) }, + ErrorCheck: acctest.ErrorCheck(t, sagemaker.EndpointsID), + ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories, + CheckDestroy: testAccCheckModelDestroy(ctx), + Steps: []resource.TestStep{ + { + Config: testAccModelConfig_primaryContainerModelS3DataSourceAcceptEula(rName), + Check: resource.ComposeTestCheckFunc( + testAccCheckModelExists(ctx, resourceName), + resource.TestCheckResourceAttr(resourceName, "primary_container.0.model_data_source.0.s3_data_source.0.model_access_config.0.accept_eula", "true"), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + }, + }, + }) +} + +func testAccModel_primaryContainerInferenceSpecificationName(t *testing.T) { + ctx := acctest.Context(t) + rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix) + resourceName := "aws_sagemaker_model.test" + + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { acctest.PreCheck(ctx, t) }, + ErrorCheck: acctest.ErrorCheck(t, sagemaker.EndpointsID), + ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories, + CheckDestroy: testAccCheckModelDestroy(ctx), + Steps: []resource.TestStep{ + { + Config: testAccModelConfig_primaryContainerInferenceSpecificationName(rName), + Check: resource.ComposeTestCheckFunc( + testAccCheckModelExists(ctx, resourceName), + resource.TestCheckResourceAttr(resourceName, "primary_container.0.inference_specification_name", "test"), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + }, + }, + }) +} + +func testAccModel_primaryContainerMultiModelConfigModelCacheSetting(t *testing.T) { + ctx := acctest.Context(t) + rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix) + resourceName := "aws_sagemaker_model.test" + + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { acctest.PreCheck(ctx, t) }, + ErrorCheck: acctest.ErrorCheck(t, sagemaker.EndpointsID), + ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories, + CheckDestroy: testAccCheckModelDestroy(ctx), + Steps: []resource.TestStep{ + { + Config: testAccModelConfig_primaryContainerMultiModelConfigModelCacheSetting(rName), + Check: resource.ComposeTestCheckFunc( + testAccCheckModelExists(ctx, resourceName), + resource.TestCheckResourceAttr(resourceName, "primary_container.0.multi_model_config.0.model_cache_setting", "Disabled"), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + }, + }, + }) +} + +func testAccModel_containersMultiModelConfigModelCacheSetting(t *testing.T) { + ctx := acctest.Context(t) + rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix) + resourceName := "aws_sagemaker_model.test" + + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { acctest.PreCheck(ctx, t) }, + ErrorCheck: acctest.ErrorCheck(t, sagemaker.EndpointsID), + ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories, + CheckDestroy: testAccCheckModelDestroy(ctx), + Steps: []resource.TestStep{ + { + Config: testAccModelConfig_containersMultiModelConfigModelCacheSetting(rName), + Check: resource.ComposeTestCheckFunc( + testAccCheckModelExists(ctx, resourceName), + resource.TestCheckResourceAttr(resourceName, "container.0.multi_model_config.0.model_cache_setting", "Disabled"), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + }, + }, + }) +} + func testAccModelConfig_base(rName string) string { return fmt.Sprintf(` resource "aws_iam_role" "test" { @@ -521,11 +629,16 @@ data "aws_iam_policy_document" "test" { principals { type = "Service" - identifiers = ["sagemaker.amazonaws.com"] + identifiers = ["sagemaker.${data.aws_partition.current.dns_suffix}"] } } } +resource "aws_iam_role_policy_attachment" "test" { + role = aws_iam_role.test.name + policy_arn = "arn:${data.aws_partition.current.partition}:iam::aws:policy/AmazonSageMakerFullAccess" +} + data "aws_sagemaker_prebuilt_ecr_image" "test" { repository_name = "kmeans" } @@ -955,3 +1068,237 @@ resource "aws_security_group" "test" { } `, rName)) } + +func testAccModelConfig_primaryContainerModelS3DataSourceAcceptEula(rName string) string { + return acctest.ConfigCompose(testAccModelConfig_base(rName), fmt.Sprintf(` +data "aws_region" "current" {} +data "aws_partition" "current" {} +locals { + region_account_map = { + us-east-1 = "763104351884" + us-east-2 = "763104351884" + us-west-1 = "763104351884" + us-west-2 = "763104351884" + af-south-1 = "626614931356" + ca-central-1 = "763104351884" + eu-central-1 = "763104351884" + eu-central-2 = "380420809688" + eu-west-1 = "763104351884" + eu-west-2 = "763104351884" + eu-west-3 = "763104351884" + eu-north-1 = "763104351884" + eu-south-1 = "692866216735" + eu-south-2 = "503227376785" + il-central-1 = "780543022126" + me-south-1 = "217643126080" + me-central-1 = "914824155844" + sa-east-1 = "763104351884" + ap-southeast-1 = "763104351884" + ap-southeast-2 = "763104351884" + ap-southeast-3 = "907027046896" + ap-southeast-4 = "457447274322" + ap-northeast-1 = "763104351884" + ap-northeast-2 = "763104351884" + ap-northeast-3 = "364406365360" + ap-south-1 = "763104351884" + ap-south-2 = "772153158452" + ap-east-1 = "871362719292" + sa-east-1 = "763104351884" + cn-north-1 = "727897471807" + cn-northwest-1 = "727897471807" + } + account = local.region_account_map[data.aws_region.current.name] + primary_container_image = format( + "%%s.dkr.ecr.%%s.%%s/huggingface-pytorch-tgi-inference:2.0.1-tgi1.1.0-gpu-py39-cu118-ubuntu20.04", + local.account, + data.aws_region.current.name, + data.aws_partition.current.dns_suffix + + primary_container_model_data_source_s3_uri = format( + "s3://jumpstart-private-cache-prod-%%s/meta-textgeneration/meta-textgeneration-llama-2-13b-f/artifacts/inference-prepack/v1.0.0/", + data.aws_region.current.name + ) +} + +resource "aws_sagemaker_model" "test" { + name = %[1]q + enable_network_isolation = true + execution_role_arn = aws_iam_role.test.arn + + primary_container { + image = local.primary_container_image + mode = "SingleModel" + environment = { + ENDPOINT_SERVER_TIMEOUT = 3600 + HF_MODEL_ID = "/opt/ml/model" + MAX_INPUT_LENGTH = 4095 + MAX_TOTAL_TOKENS = 4096 + MODEL_CACHE_ROOT = "/opt/ml/model" + SAGEMAKER_ENV = 1 + SAGEMAKER_MODEL_SERVER_WORKERS = 1 + SAGEMAKER_PROGRAM = "inference.py" + SM_NUM_GPUS = 4 + } + + model_data_source { + s3_data_source { + compression_type = "None" + s3_data_type = "S3Prefix" + s3_uri = local.primary_container_model_data_source_s3_uri + model_access_config { + accept_eula = true + } + } + } +} +`, rName)) +} + +func testAccModelConfig_primaryContainerInferenceSpecificationName(rName string) string { + return acctest.ConfigCompose(testAccModelConfig_base(rName), fmt.Sprintf(` + +resource "aws_sagemaker_model" "test" { + name = %[1]q + execution_role_arn = aws_iam_role.test.arn + + primary_container { + image = data.aws_sagemaker_prebuilt_ecr_image.test.registry_path + inference_specification_name = "test" + } +} +`, rName)) +} + +func testAccModelConfig_primaryContainerMultiModelConfigModelCacheSetting(rName string) string { + return acctest.ConfigCompose(testAccModelConfig_base(rName), fmt.Sprintf(` +data "aws_region" "current" {} +data "aws_partition" "current" {} +locals { + region_account_map = { + "us-east-1" = "785573368785" + "us-east-2" = "007439368137" + "us-west-1" = "710691900526" + "us-west-2" = "301217895009" + "eu-west-1" = "802834080501" + "eu-west-2" = "205493899709" + "eu-west-3" = "254080097072" + "eu-north-1" = "601324751636" + "eu-south-1" = "966458181534" + "eu-central-1" = "746233611703" + "ap-east-1" = "110948597952" + "ap-south-1" = "763008648453" + "ap-northeast-1" = "941853720454" + "ap-northeast-2" = "151534178276" + "ap-southeast-1" = "324986816169" + "ap-southeast-2" = "355873309152" + "cn-northwest-1" = "474822919863" + "cn-north-1" = "472730292857" + "sa-east-1" = "756306329178" + "ca-central-1" = "464438896020" + "me-south-1" = "836785723513" + "af-south-1" = "774647643957" + } + account = local.region_account_map[data.aws_region.current.name] + primary_container_image = format( + "%%s.dkr.ecr.%%s.%%s/sagemaker-tritonserver:22.07-py3", + local.account, + data.aws_region.current.name, + data.aws_partition.current.dns_suffix + ) +} + +resource "aws_s3_bucket" "test" { + bucket = %[1]q + force_destroy = true +} + +resource "aws_s3_object" "test" { + bucket = aws_s3_bucket.test.bucket + key = "resnet50-mme-gpu/model.tar.gz" + content = "some-data" +} + +resource "aws_sagemaker_model" "test" { + depends_on = [aws_s3_object.test] + + name = %[1]q + execution_role_arn = aws_iam_role.test.arn + + primary_container { + image = local.primary_container_image + mode = "MultiModel" + model_data_url = "s3://${aws_s3_bucket.test.id}/resnet50-mme-gpu/" + multi_model_config { + model_cache_setting = "Disabled" + } + } +} +`, rName)) +} + +func testAccModelConfig_containersMultiModelConfigModelCacheSetting(rName string) string { + return acctest.ConfigCompose(testAccModelConfig_base(rName), fmt.Sprintf(` +data "aws_region" "current" {} +data "aws_partition" "current" {} +locals { + region_account_map = { + "us-east-1" = "785573368785" + "us-east-2" = "007439368137" + "us-west-1" = "710691900526" + "us-west-2" = "301217895009" + "eu-west-1" = "802834080501" + "eu-west-2" = "205493899709" + "eu-west-3" = "254080097072" + "eu-north-1" = "601324751636" + "eu-south-1" = "966458181534" + "eu-central-1" = "746233611703" + "ap-east-1" = "110948597952" + "ap-south-1" = "763008648453" + "ap-northeast-1" = "941853720454" + "ap-northeast-2" = "151534178276" + "ap-southeast-1" = "324986816169" + "ap-southeast-2" = "355873309152" + "cn-northwest-1" = "474822919863" + "cn-north-1" = "472730292857" + "sa-east-1" = "756306329178" + "ca-central-1" = "464438896020" + "me-south-1" = "836785723513" + "af-south-1" = "774647643957" + } + account = local.region_account_map[data.aws_region.current.name] + container_image = format( + "%%s.dkr.ecr.%%s.%%s/sagemaker-tritonserver:22.07-py3", + local.account, + data.aws_region.current.name, + data.aws_partition.current.dns_suffix + ) +} + +resource "aws_s3_bucket" "test" { + bucket = %[1]q + force_destroy = true +} + +resource "aws_s3_object" "test" { + bucket = aws_s3_bucket.test.bucket + key = "resnet50-mme-gpu/model.tar.gz" + content = "some-data" +} + +resource "aws_sagemaker_model" "test" { + depends_on = [aws_s3_object.test] + + name = %[1]q + execution_role_arn = aws_iam_role.test.arn + + container { + image = local.container_image + mode = "MultiModel" + model_data_url = "s3://${aws_s3_bucket.test.id}/resnet50-mme-gpu/" + multi_model_config { + model_cache_setting = "Disabled" + } + } +} +`, rName)) +} diff --git a/internal/service/sagemaker/sagemaker_test.go b/internal/service/sagemaker/sagemaker_test.go index 37b5bc18fae0..d387580a73ae 100644 --- a/internal/service/sagemaker/sagemaker_test.go +++ b/internal/service/sagemaker/sagemaker_test.go @@ -69,6 +69,27 @@ func TestAccSageMaker_serial(t *testing.T) { "posix": testAccDomain_posix, "spaceStorageSettings": testAccDomain_spaceStorageSettings, }, + "Model": { + "basic": TestAccSageMakerModel_basic, + "disappears": TestAccSageMakerModel_disappears, + "inferenceExecution": TestAccSageMakerModel_inferenceExecution, + "tags": TestAccSageMakerModel_tags, + "vpc": TestAccSageMakerModel_vpc, + "networkIsolation": TestAccSageMakerModel_networkIsolation, + "primaryContainerModelDataUrl": TestAccSageMakerModel_primaryContainerModelDataURL, + "primaryContainerHostName": TestAccSageMakerModel_primaryContainerHostname, + "primaryContainerImage": TestAccSageMakerModel_primaryContainerImage, + "primaryContainerEnvironment": TestAccSageMakerModel_primaryContainerEnvironment, + "primaryContainerModeSingle": TestAccSageMakerModel_primaryContainerModeSingle, + "primaryContainerModelPackageName": TestAccSageMakerModel_primaryContainerModelPackageName, + "primaryContainerModelDataSource": TestAccSageMakerModel_primaryContainerModelDataSource, + "primaryContainerPrivateDockerRegistry": TestAccSageMakerModel_primaryContainerPrivateDockerRegistry, + "containers": TestAccSageMakerModel_containers, + "primaryContainerModelS3DataSource_AcceptEula": testAccModel_primaryContainerModelS3DataSourceAcceptEula, + "primaryContainerinferencespecificationName": testAccModel_primaryContainerInferenceSpecificationName, + "primaryContainer_multiModelConfig_modelCacheSetting": testAccModel_primaryContainerMultiModelConfigModelCacheSetting, + "containers_multiModelConfig_modelCacheSetting": testAccModel_containersMultiModelConfigModelCacheSetting, + }, "FlowDefinition": { "basic": testAccFlowDefinition_basic, "disappears": testAccFlowDefinition_disappears, @@ -78,8 +99,8 @@ func TestAccSageMaker_serial(t *testing.T) { }, "Space": { "basic": testAccSpace_basic, - "disappears": testAccSpace_tags, - "tags": testAccSpace_disappears, + "disappears": testAccSpace_disappears, + "tags": testAccSpace_tags, "kernelGatewayAppSettings": testAccSpace_kernelGatewayAppSettings, "kernelGatewayAppSettings_lifecycleConfig": testAccSpace_kernelGatewayAppSettings_lifecycleconfig, "kernelGatewayAppSettings_imageConfig": testAccSpace_kernelGatewayAppSettings_imageconfig, diff --git a/website/docs/r/sagemaker_model.html.markdown b/website/docs/r/sagemaker_model.html.markdown index e324ffcbca39..17f65dfb648b 100644 --- a/website/docs/r/sagemaker_model.html.markdown +++ b/website/docs/r/sagemaker_model.html.markdown @@ -68,6 +68,8 @@ The `primary_container` and `container` block both support: * `environment` - (Optional) Environment variables for the Docker container. A list of key value pairs. * `image_config` - (Optional) Specifies whether the model container is in Amazon ECR or a private Docker registry accessible from your Amazon Virtual Private Cloud (VPC). For more information see [Using a Private Docker Registry for Real-Time Inference Containers](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-containers-inference-private.html). see [Image Config](#image-config). +* `inference_specification_name` - (Optional) The inference specification name in the model package version. +* `multi_model_config` - (Optional) Specifies additional configuration for multi-model endpoints. see [Multi Model Config](#multi-model-config). ### Image Config @@ -87,6 +89,15 @@ The `primary_container` and `container` block both support: * `compression_type` - (Required) How the model data is prepared. Allowed values are: `None` and `Gzip`. * `s3_data_type` - (Required) The type of model data to deploy. Allowed values are: `S3Object` and `S3Prefix`. * `s3_uri` - (Required) The S3 path of model data to deploy. +* `model_access_config` - (Optional) Specifies the access configuration file for the ML model. You can explicitly accept the model end-user license agreement (EULA) within the [`model_access_config` configuration block]. You are responsible for reviewing and complying with any applicable license terms and making sure they are acceptable for your use case before downloading or using a model. see [Model Access Config](#model-access-config). + +##### Model Access Config + +* `accept_eula` - (Required) Specifies agreement to the model end-user license agreement (EULA). The AcceptEula value must be explicitly defined as `true` in order to accept the EULA that this model requires. You are responsible for reviewing and complying with any applicable license terms and making sure they are acceptable for your use case before downloading or using a model. + +### Multi Model Config + +* `model_cache_setting` - (Optional) Whether to cache models for a multi-model endpoint. By default, multi-model endpoints cache models so that a model does not have to be loaded into memory each time it is invoked. Some use cases do not benefit from model caching. For example, if an endpoint hosts a large number of models that are each invoked infrequently, the endpoint might perform better if you disable model caching. To disable model caching, set the value of this parameter to `Disabled`. Allowed values are: `Enabled` and `Disabled`. ## Inference Execution Config