Skip to content

Commit

Permalink
Merge pull request #234 from cliveseldon/proxies
Browse files Browse the repository at this point in the history
Nvidia Inference Server and Tensorflow Serving Model Proxies
  • Loading branch information
ukclivecox authored Sep 26, 2018
2 parents df02555 + 3ebb9f9 commit 43ee776
Show file tree
Hide file tree
Showing 32 changed files with 3,318 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -378,22 +378,29 @@ public SeldonDeployment defaulting(SeldonDeployment mlDep) {
for(int cIdx = 0;cIdx < spec.getSpec().getContainersCount();cIdx++)
{
V1.Container c = spec.getSpec().getContainers(cIdx);
String containerServiceKey = getPredictorServiceNameKey(c.getName());
String containerServiceValue = getSeldonServiceName(mlDep, p, c.getName());
metaBuilder.putLabels(containerServiceKey, containerServiceValue);

int portNum;
if (servicePortMap.containsKey(c.getName()))
portNum = servicePortMap.get(c.getName());
else
// Only update graph and container if container is referenced in the inference graph
V1.Container c2;
if(isContainerInGraph(p.getGraph(), c))
{
portNum = currentServicePortNum;
servicePortMap.put(c.getName(), portNum);
currentServicePortNum++;
String containerServiceKey = getPredictorServiceNameKey(c.getName());
String containerServiceValue = getSeldonServiceName(mlDep, p, c.getName());
metaBuilder.putLabels(containerServiceKey, containerServiceValue);

int portNum;
if (servicePortMap.containsKey(c.getName()))
portNum = servicePortMap.get(c.getName());
else
{
portNum = currentServicePortNum;
servicePortMap.put(c.getName(), portNum);
currentServicePortNum++;
}
c2 = this.updateContainer(c, findPredictiveUnitForContainer(mlDep.getSpec().getPredictors(pbIdx).getGraph(),c.getName()),portNum,deploymentName,predictorName);
updatePredictiveUnitBuilderByName(mlBuilder.getSpecBuilder().getPredictorsBuilder(pbIdx).getGraphBuilder(),c2,containerServiceValue);
}
V1.Container c2 = this.updateContainer(c, findPredictiveUnitForContainer(mlDep.getSpec().getPredictors(pbIdx).getGraph(),c.getName()),portNum,deploymentName,predictorName);
else
c2 = c;
mlBuilder.getSpecBuilder().getPredictorsBuilder(pbIdx).getComponentSpecsBuilder(ptsIdx).getSpecBuilder().addContainers(cIdx, c2);
updatePredictiveUnitBuilderByName(mlBuilder.getSpecBuilder().getPredictorsBuilder(pbIdx).getGraphBuilder(),c2,containerServiceValue);
}
mlBuilder.getSpecBuilder().getPredictorsBuilder(pbIdx).getComponentSpecsBuilder(ptsIdx).setMetadata(metaBuilder);
}
Expand Down Expand Up @@ -504,6 +511,26 @@ private String getAmbassadorAnnotation(SeldonDeployment mlDep,String serviceName
return restMapping + grpcMapping;
}

/**
*
* @param pu - A predictiveUnit
* @param container - a container
* @return True if container name can be found in graph of pu
*/
private boolean isContainerInGraph(PredictiveUnit pu,V1.Container container)
{
if (pu.getName().equals(container.getName()))
{
return true;
}
else
{
for(int i=0;i<pu.getChildrenCount();i++)
if (isContainerInGraph(pu.getChildren(i),container))
return true;
}
return false;
}

private void addServicePort(PredictiveUnit pu,String serviceName,ServiceSpec.Builder svcSpecBuilder)
{
Expand Down Expand Up @@ -651,7 +678,8 @@ public DeploymentResources createResources(SeldonDeployment mlDep) throws Seldon
final String containerServiceKey = getPredictorServiceNameKey(c.getName());
final String containerServiceValue = getSeldonServiceName(mlDep, p, c.getName());

if (!createdServices.contains(containerServiceValue))
// Only add a Service if container is a Seldon component in graph and we haven't already created a service for this container name
if (isContainerInGraph(p.getGraph(), c) && !createdServices.contains(containerServiceValue))
{
//Add service
Service.Builder s = Service.newBuilder()
Expand Down
4 changes: 4 additions & 0 deletions examples/models/nvidia-mnist/.s2i/environment
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
MODEL_NAME=MnistTransformer
API_TYPE=REST
SERVICE_TYPE=TRANSFORMER
PERSISTENCE=0
14 changes: 14 additions & 0 deletions examples/models/nvidia-mnist/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
TRANSFORMER_IMAGE=seldonio/mnist-caffe2-transformer:0.1

clean:
rm -f rm -f tensorrt_mnist/1/model.plan
rm -rf MNIST_data
rm -f mnist.json
rm -f tmp.json

build_transformer:
s2i build . seldonio/seldon-core-s2i-python3:0.2 ${TRANSFORMER_IMAGE}

push_transformer:
docker push ${TRANSFORMER_IMAGE}

27 changes: 27 additions & 0 deletions examples/models/nvidia-mnist/MnistTransformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import numpy as np

MEANS=np.array([255.0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,254,254,254,253,252,252,251,251,252,252,253,254,254,255,255,255,255,255,255,255,255,255,255,255,255,255,254,254,253,251,249,248,245,243,242,242,243,246,248,251,253,254,255,255,255,255,255,255,255,255,255,255,255,254,253,250,247,242,235,228,220,213,210,211,216,224,232,240,246,251,253,254,255,255,255,255,255,255,255,255,254,251,248,242,234,223,211,196,181,170,164,166,175,189,205,221,233,243,248,252,254,255,255,255,255,255,255,254,252,248,241,231,217,202,184,166,149,136,131,134,143,159,180,201,220,234,243,249,253,255,255,255,255,255,254,253,249,243,233,219,201,181,161,143,130,122,120,122,129,141,161,185,208,227,240,248,252,254,255,255,255,255,254,251,246,238,226,208,187,164,146,135,131,132,133,132,133,139,154,178,202,223,239,248,252,255,255,255,255,254,253,251,245,236,221,200,177,156,144,144,150,156,156,151,144,144,156,178,202,224,240,249,253,255,255,255,255,254,253,251,245,235,218,195,172,155,152,161,172,176,170,161,150,149,161,183,207,227,242,250,254,255,255,255,255,255,254,251,246,234,215,191,168,156,160,173,182,179,169,157,147,149,166,190,213,230,243,251,254,255,255,255,255,255,254,252,246,233,212,186,165,157,164,175,176,165,153,142,137,147,170,196,217,231,242,251,255,255,255,255,255,255,254,252,245,230,207,182,163,158,164,168,158,143,131,125,128,146,174,200,218,231,241,250,254,255,255,255,255,255,255,252,243,227,205,181,164,159,161,157,139,124,115,118,127,148,176,199,216,230,240,249,254,255,255,255,255,255,254,251,241,224,204,184,169,163,160,150,132,119,116,123,133,153,177,197,214,228,240,249,254,255,255,255,255,255,254,251,239,222,205,189,177,171,166,154,139,129,128,134,144,159,177,195,213,228,241,249,254,255,255,255,255,255,254,249,237,222,207,195,186,180,175,166,153,143,140,142,150,162,178,195,214,230,242,250,254,255,255,255,255,255,253,247,235,220,207,197,189,183,179,172,160,148,142,143,150,161,178,198,217,233,244,250,254,255,255,255,255,255,253,246,233,218,204,192,184,177,172,165,153,142,137,139,148,163,183,204,222,236,246,251,254,255,255,255,255,255,253,247,234,218,201,186,174,165,157,148,137,130,129,137,151,171,194,214,230,242,248,252,254,255,255,255,255,255,253,249,238,222,203,184,168,154,143,132,124,123,130,145,165,188,209,227,239,247,251,253,255,255,255,255,255,255,254,251,244,232,214,194,174,156,142,132,130,134,148,167,189,210,226,238,246,250,253,254,255,255,255,255,255,255,255,253,250,243,231,215,196,178,163,155,156,164,179,197,215,230,240,247,251,253,254,255,255,255,255,255,255,255,255,254,253,251,246,238,228,217,208,203,204,210,218,228,236,243,248,251,253,254,255,255,255,255,255,255,255,255,255,255,255,254,252,249,245,241,238,237,237,239,242,245,247,250,252,253,254,255,255,255,255,255,255,255,255,255,255,255,255,254,254,253,252,250,249,248,249,249,250,252,253,253,254,254,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,254,254,254,254,255,255,255,255,255,255,255,255,255,255,255,255])


class MnistTransformer(object):

def __init__(self):
print("init");

def preProcessMNIST(self,X):
'''
Convert values assumed to be in 0-1 range to a value in 0-255.
The remove the training mean needed by the Caffe2 model.
Finally reshape the output to that expected by the model
'''
X = X * 255
X = 255 - X
X = (X.reshape(784) - MEANS).reshape(28,28,1)
X = np.transpose(X, (2, 0, 1))
return X

def transform_input(self,X,names):
return self.preProcessMNIST(X)

def transform_output(self,X,names):
return X.reshape(1,10)
1 change: 1 addition & 0 deletions examples/models/nvidia-mnist/fetch-model.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
wget -O mnist_tensorrt_model/1/model.plan http://seldon-public.s3.amazonaws.com/nvidia-mnist-model/model.plan
6 changes: 6 additions & 0 deletions examples/models/nvidia-mnist/nvidia-mnist/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: v1
description: Seldon MNIST Nvidia Inference Server Example
name: nvidia-mnist
sources:
- https://github.com/SeldonIO/seldon-core
version: 0.1
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
{
"apiVersion": "machinelearning.seldon.io/v1alpha2",
"kind": "SeldonDeployment",
"metadata": {
"labels": {
"app": "seldon"
},
"name": "nvidia-mnist",
"namespace": "{{ .Release.Namespace }}"
},
"spec": {
"name": "caffe2-mnist",
"predictors": [
{
"componentSpecs": [{
"spec": {
"containers": [
{
"image": "seldonio/mnist-caffe2-transformer:0.1",
"name": "mnist-transformer"
},
{
"image": "seldonio/nvidia-inference-server-proxy:0.1",
"name": "nvidia-proxy"
},
{
"args": [
"--model-store={{ .Values.nvidia.model_store }}"
],
"command": [
"inference_server"
],
"image": "nvcr.io/nvidia/inferenceserver:18.08.1-py2",
"livenessProbe": {
"failureThreshold": 3,
"handler":{
"httpGet": {
"path": "/api/health/live",
"port": {{ .Values.nvidia.port }},
"scheme": "HTTP"
}
},
"initialDelaySeconds": 5,
"periodSeconds": 5,
"successThreshold": 1,
"timeoutSeconds": 1
},
"name": "inference-server",
"ports": [
{
"containerPort": {{ .Values.nvidia.port }},
"protocol": "TCP"
},
{
"containerPort": 8001,
"protocol": "TCP"
},
{
"containerPort": 8002,
"protocol": "TCP"
}
],
"readinessProbe": {
"failureThreshold": 3,
"handler":{
"httpGet": {
"path": "/api/health/ready",
"port": {{ .Values.nvidia.port }},
"scheme": "HTTP"
}
},
"initialDelaySeconds": 5,
"periodSeconds": 5,
"successThreshold": 1,
"timeoutSeconds": 1
},
"resources": {
"limits": {
"nvidia.com/gpu": "1"
},
"requests": {
"cpu": "100m",
"nvidia.com/gpu": "1"
}
},
"securityContext": {
"runAsUser": 1000
}
}
],
"terminationGracePeriodSeconds": 1,
"imagePullSecrets": [
{
"name": "ngc"
}
]
}
}],
"graph": {
"name": "mnist-transformer",
"endpoint": { "type" : "REST" },
"type": "TRANSFORMER",
"children": [
{
"name": "nvidia-proxy",
"endpoint": { "type" : "REST" },
"type": "MODEL",
"children": [],
"parameters":
[
{
"name":"url",
"type":"STRING",
"value":"127.0.0.1:{{ .Values.nvidia.port }}"
},
{
"name":"model_name",
"type":"STRING",
"value":"tensorrt_mnist"
},
{
"name":"protocol",
"type":"STRING",
"value":"HTTP"
}
]
}
]
},
"name": "mnist-nvidia",
"replicas": 1
}
]
}
}
4 changes: 4 additions & 0 deletions examples/models/nvidia-mnist/nvidia-mnist/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
nvidia:
model_store: gs://seldon-inference-server-model-store
port: 8000

Loading

0 comments on commit 43ee776

Please sign in to comment.