-
Notifications
You must be signed in to change notification settings - Fork 0
/
endpoint-config-template.yml
310 lines (279 loc) · 10.2 KB
/
endpoint-config-template.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
Description:
This template is built and deployed by the infrastructure pipeline in various stages (staging/production) as required.
It specifies the resources that need to be created, like the SageMaker Endpoint.
Parameters:
StageName:
Description: The name for a project pipeline stage, such as Staging or Prod, for which resources are provisioned and deployed.
Type: String
AllowedValues:
- staging
- prod-us
- prod-eu
StackName:
Description: The use case for the model.
Type: String
AllowedValues:
- email-type
- email-names
SageMakerProjectName:
Description: Name of the project
Type: String
MinLength: 1
MaxLength: 32
AllowedPattern: ^[a-zA-Z](-*[a-zA-Z0-9])*
DeploymentVersion:
Description: Deployment version
Type: String
MinLength: 1
MaxLength: 32
AllowedPattern: ^[a-zA-Z](-*[a-zA-Z0-9])*
ModelDataUrl:
Description: URL of the model artifact in an S3 Bucket.
Type: String
SageMakerImageUri:
Description: The URI of the Docker image that contains the model.
Type: String
ModelExecutionRoleArn:
Description: Execution role used for deploying the model.
Type: String
EndpointInstanceType:
Description: The ML compute instance type for the endpoint.
Type: String
EndpointInstanceCount:
Description: Number of instances to launch for the endpoint.
Type: Number
MinValue: 1
EndpointScalingTargetValue:
Description: Target value for the SageMakerVariantInvocationsPerInstance metric to scale based on.
Type: Number
EndpointScalingMinCapacity:
Description: Minimum value to scale in to.
Type: Number
EndpointScalingMaxCapacity:
Description: Maximum value to scale out to.
Type: Number
EndpointScaleInCooldown:
Description: Amount of time, in seconds, after a scale in activity completes before another scale in activity can start.
Type: Number
EndpointScaleOutCooldown:
Description: Amount of time, in seconds, after a scale out activity completes before another scale out activity can start.
Type: Number
ApiFunctionSourceCodeBucket:
Description: Name of the S3 Bucket where the Lambda Function's source code is stored
Type: String
ApiFunctionSourceCodeKey:
Description: Name of the Lambda Function's source code .zip stored in S3
Type: String
ApiFunctionHandler:
Description: Name of the Lambda Function's handler method
Type: String
ApiFunctionRuntime:
Description: Lambda Function's runtime
Type: String
Conditions:
IsEmailNames: !Equals [ !Ref StackName, "email-names" ]
Resources:
Model:
Description: Model resource in SageMaker
Type: AWS::SageMaker::Model
Properties:
ExecutionRoleArn: !Ref ModelExecutionRoleArn
ModelName: !Sub ${SageMakerProjectName}-model-${StageName}-${StackName}-${DeploymentVersion}
PrimaryContainer:
Image: !Ref SageMakerImageUri
ModelDataSource:
S3DataSource:
S3Uri: !Ref ModelDataUrl
S3DataType: S3Prefix
CompressionType: None
Environment:
!If
- IsEmailNames
- { HF_MODEL_ID: "/opt/ml/model" } # For email-names model
- {} # For email-type model
EndpointConfig:
Description: Configuration for the model endpoint in SageMaker
Type: AWS::SageMaker::EndpointConfig
Properties:
EndpointConfigName: !Sub ${SageMakerProjectName}-${StageName}-${StackName}-${DeploymentVersion}
ProductionVariants:
- InitialInstanceCount: !Ref EndpointInstanceCount
InitialVariantWeight: 1.0
InstanceType: !Ref EndpointInstanceType
ModelName: !GetAtt Model.ModelName
VariantName: AllTraffic
Endpoint:
Description: Endpoint for the model in SageMaker
Type: AWS::SageMaker::Endpoint
Properties:
EndpointName: !Sub ${SageMakerProjectName}-${StageName}-${StackName}
EndpointConfigName: !GetAtt EndpointConfig.EndpointConfigName
EndpointScalingTarget:
Description: Target for scaling the model endpoint
Type: AWS::ApplicationAutoScaling::ScalableTarget
DependsOn:
- Endpoint
Properties:
MaxCapacity: !Ref EndpointScalingMaxCapacity
MinCapacity: !Ref EndpointScalingMinCapacity
ResourceId: !Sub endpoint/${SageMakerProjectName}-${StageName}-${StackName}/variant/AllTraffic
RoleARN: !GetAtt EndpointScalingRole.Arn
ScalableDimension: sagemaker:variant:DesiredInstanceCount
ServiceNamespace: sagemaker
SuspendedState:
DynamicScalingInSuspended: false
DynamicScalingOutSuspended: false
ScheduledScalingSuspended: false
EndpointScalingPolicy:
Description: Scaling policy for the model endpoint
Type: AWS::ApplicationAutoScaling::ScalingPolicy
DependsOn:
- EndpointScalingTarget
Properties:
PolicyName: !Sub ${SageMakerProjectName}-${StageName}-${StackName}-scaling-policy
PolicyType: TargetTrackingScaling
ScalingTargetId: !Ref EndpointScalingTarget
TargetTrackingScalingPolicyConfiguration:
PredefinedMetricSpecification:
PredefinedMetricType: SageMakerVariantInvocationsPerInstance
ScaleInCooldown: !Ref EndpointScaleInCooldown
ScaleOutCooldown: !Ref EndpointScaleOutCooldown
TargetValue: !Ref EndpointScalingTargetValue
EndpointScalingRole:
Description: IAM Role for scaling the model endpoint
Type: AWS::IAM::Role
Properties:
RoleName: !Sub sagemaker-endpoint-scaling-role-${StageName}-${StackName}
AssumeRolePolicyDocument:
Version: '2012-10-17'
Statement:
- Effect: Allow
Principal:
Service:
- application-autoscaling.amazonaws.com
Action:
- sts:AssumeRole
Policies:
- PolicyName: SageMakerEndpointScaling
PolicyDocument:
Version: "2012-10-17"
Statement:
- Sid: SageMakerAllowScaling
Effect: Allow
Action:
- sagemaker:UpdateEndpointWeightsAndCapacities
Resource:
- !GetAtt Endpoint.Arn
Effect: Allow
Action:
- cloudwatch:DescribeAlarms
- cloudwatch:GetMetricStatistics
- cloudwatch:PutMetricAlarm
- cloudwatch:DeleteAlarms
Resource: "*"
Api:
Description: REST API for the model
Type: AWS::ApiGateway::RestApi
Properties:
Name: !Sub ${SageMakerProjectName}-${StageName}-${StackName}
Description: !Sub REST API for ${SageMakerProjectName} in the ${StageName} environment
EndpointConfiguration:
Types:
- REGIONAL
ApiRootMethod:
Description: Root method for the model's API
Type: AWS::ApiGateway::Method
Properties:
RestApiId: !Ref Api
ResourceId: !GetAtt Api.RootResourceId
AuthorizationType: NONE
HttpMethod: POST
Integration:
IntegrationHttpMethod: POST
Type: AWS_PROXY
Uri: !Sub
- arn:aws:apigateway:${AWS::Region}:lambda:path/2015-03-31/functions/${lambdaArn}/invocations
- lambdaArn: !GetAtt ApiFunction.Arn
ApiDeployment:
Description: Deployment of the model's API
Type: AWS::ApiGateway::Deployment
DependsOn:
- ApiRootMethod
Properties:
RestApiId: !Ref Api
StageName: !Ref StageName
StageDescription:
LoggingLevel: INFO
ApiFunction:
Description: Lambda Function for the model API
Type: AWS::Lambda::Function
Properties:
FunctionName: !Sub ${SageMakerProjectName}-${StageName}-${StackName}-api
Description: !Sub Lambda Function for the API Gateway REST API ${SageMakerProjectName}-${StageName}-${StackName}
Code:
S3Bucket: !Ref ApiFunctionSourceCodeBucket
S3Key: !Ref ApiFunctionSourceCodeKey
Handler: !Ref ApiFunctionHandler
Runtime: !Ref ApiFunctionRuntime
Role: !GetAtt ApiFunctionRole.Arn
Timeout: !If [IsEmailNames, 30, 10]
MemorySize: !If [IsEmailNames, 1024, 128]
Environment:
Variables:
ENDPOINT_NAME: !GetAtt Endpoint.EndpointName
LoggingConfig:
ApplicationLogLevel: TRACE
SystemLogLevel: DEBUG
LogFormat: JSON
ApiFunctionPermission:
Description: Permission to invoke API model's Lambda Function from API Gateway
Type: AWS::Lambda::Permission
Properties:
Action: lambda:InvokeFunction
FunctionName: !GetAtt ApiFunction.Arn
Principal: apigateway.amazonaws.com
SourceArn: !Sub
- arn:aws:execute-api:${AWS::Region}:${AWS::AccountId}:${apiId}/*/POST/
- apiId: !Ref Api
ApiFunctionRole:
Description: IAM Role for the model API's Lambda Function
Type: AWS::IAM::Role
Properties:
RoleName: !Sub lambda-execution-role-${StageName}-${StackName}-api
Description: Lambda execution role for ${StageName}-${StackName}-api
AssumeRolePolicyDocument:
Version: '2012-10-17'
Statement:
- Effect: Allow
Principal:
Service:
- lambda.amazonaws.com
Action:
- sts:AssumeRole
ManagedPolicyArns:
- arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole
Policies:
- PolicyName: SageMakerInvokeEndpoint
PolicyDocument:
Version: "2012-10-17"
Statement:
- Sid: SageMakerAllowInvokeEndpoint
Effect: Allow
Action:
- sagemaker:InvokeEndpoint
Resource:
- !Ref Endpoint
- PolicyName: CloudWatchLogs
PolicyDocument:
Version: "2012-10-17"
Statement:
- Sid: CloudWatchAllowLogging
Effect: Allow
Action:
- logs:CreateLogGroup
- logs:CreateLogStream
- logs:PutLogEvents
Resource:
- !Sub
- arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/${lambdaName}
- lambdaName: !Sub ${SageMakerProjectName}-${StageName}-${StackName}-api