Skip to content

Commit

Permalink
adding multiple grpc connections for python server and executor client (
Browse files Browse the repository at this point in the history
#3356)

* adding multiple grpc connections for python server and executor client

* fix PR comments, add GRPC workers arg

* non-daemon GRPC server process

* use grpc_workers arg

* run make fmt on python files

* run make test on executor

* fix syntax error

* format python files

* Added test for evaluating workers performance

* Added testing model for bench

Co-authored-by: Alejandro Saucedo <axsauze@gmail.com>
  • Loading branch information
mwm5945 and axsaucedo authored Jul 15, 2021
1 parent 3de8b4e commit 4d86baf
Show file tree
Hide file tree
Showing 13 changed files with 393 additions and 211 deletions.
3 changes: 2 additions & 1 deletion examples/models/testing/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,6 @@ run_build_all: build_predict build_predict_raw
run_push_all: push_predict push_predict_raw

.PHONY: run_kind_load_all
run_kind_load_all: kind_load_predict kind_load_predict_raw
run_kind_load_all: run_build_all kind_load_predict kind_load_predict_raw


35 changes: 16 additions & 19 deletions examples/triton_gpt2/AzureSetup.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -54,17 +54,17 @@
"metadata": {},
"outputs": [],
"source": [
"subscription_id = \"<xxxx-xxxx-xxxx-xxxx>\" # fill in\n",
"resource_group = \"seldon\" # feel free to replace or use this default\n",
"region = \"eastus2\" # ffeel free to replace or use this default\n",
"subscription_id = \"<xxxx-xxxx-xxxx-xxxx>\" # fill in\n",
"resource_group = \"seldon\" # feel free to replace or use this default\n",
"region = \"eastus2\" # ffeel free to replace or use this default\n",
"\n",
"storage_account_name = \"modeltestsgpt\" # fill in\n",
"storage_container_name = \"gpt2tf\" \n",
"storage_account_name = \"modeltestsgpt\" # fill in\n",
"storage_container_name = \"gpt2tf\"\n",
"\n",
"aks_name = \"modeltests\" # feel free to replace or use this default\n",
"aks_gpupool = \"gpunodes\" # feel free to replace or use this default\n",
"aks_cpupool = \"cpunodes\" # feel free to replace or use this default\n",
"aks_gpu_sku = \"Standard_NC6s_v3\" # feel free to replace or use this default \n",
"aks_name = \"modeltests\" # feel free to replace or use this default\n",
"aks_gpupool = \"gpunodes\" # feel free to replace or use this default\n",
"aks_cpupool = \"cpunodes\" # feel free to replace or use this default\n",
"aks_gpu_sku = \"Standard_NC6s_v3\" # feel free to replace or use this default\n",
"aks_cpu_sku = \"Standard_F8s_v2\""
]
},
Expand Down Expand Up @@ -200,7 +200,7 @@
"metadata": {},
"outputs": [],
"source": [
"!kubectl taint nodes -l kubernetes.azure.com/mode=system CriticalAddonsOnly=true:NoSchedule --overwrite\n"
"!kubectl taint nodes -l kubernetes.azure.com/mode=system CriticalAddonsOnly=true:NoSchedule --overwrite"
]
},
{
Expand All @@ -221,7 +221,7 @@
"!az feature register --name GPUDedicatedVHDPreview --namespace Microsoft.ContainerService\n",
"!az feature list -o table --query \"[?contains(name, 'Microsoft.ContainerService/GPUDedicatedVHDPreview')].{Name:name,State:properties.state}\"\n",
"!az provider register --namespace Microsoft.ContainerService\n",
"!az extension add --name aks-preview\n"
"!az extension add --name aks-preview"
]
},
{
Expand Down Expand Up @@ -355,7 +355,7 @@
}
],
"source": [
"%%time \n",
"%%time\n",
"!az aks nodepool add \\\n",
" --resource-group {resource_group} \\\n",
" --cluster-name {aks_name} \\\n",
Expand Down Expand Up @@ -418,7 +418,7 @@
],
"source": [
"%%time\n",
"!az storage account create -n {storage_account_name} -g {resource_group} --query 'provisioningState'\n"
"!az storage account create -n {storage_account_name} -g {resource_group} --query 'provisioningState'"
]
},
{
Expand Down Expand Up @@ -451,7 +451,7 @@
"metadata": {},
"outputs": [],
"source": [
"storage_account_key = key[0] "
"storage_account_key = key[0]"
]
},
{
Expand Down Expand Up @@ -525,8 +525,7 @@
],
"source": [
"!helm repo add blob-csi-driver https://raw.githubusercontent.com/kubernetes-sigs/blob-csi-driver/master/charts\n",
"!helm install blob-csi-driver blob-csi-driver/blob-csi-driver --namespace kube-system --version v1.1.0\n",
"\n"
"!helm install blob-csi-driver blob-csi-driver/blob-csi-driver --namespace kube-system --version v1.1.0"
]
},
{
Expand Down Expand Up @@ -568,7 +567,6 @@
}
],
"source": [
"\n",
"# Create secret to access storage account\n",
"!kubectl create secret generic azure-blobsecret --from-literal azurestorageaccountname={storage_account_name} --from-literal azurestorageaccountkey=\"{storage_account_key}\" --type=Opaque "
]
Expand Down Expand Up @@ -661,7 +659,6 @@
}
],
"source": [
"\n",
"# Create PersistentVolume and PersistenVollumeClaim for container mounts\n",
"!kubectl apply -f azure-blobfuse-pv.yaml"
]
Expand Down Expand Up @@ -692,4 +689,4 @@
"metadata": {}
}
]
}
}
133 changes: 74 additions & 59 deletions examples/triton_gpt2/GPT2-ONNX-Azure.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
},
"outputs": [],
"source": [
"!pip install --trusted-host=pypi.python.org --trusted-host=pypi.org --trusted-host=files.pythonhosted.org -r requirements.txt\n"
"!pip install --trusted-host=pypi.python.org --trusted-host=pypi.org --trusted-host=files.pythonhosted.org -r requirements.txt"
]
},
{
Expand All @@ -75,9 +75,12 @@
"metadata": {},
"outputs": [],
"source": [
"from transformers import TFGPT2LMHeadModel, GPT2Tokenizer\n",
"from transformers import GPT2Tokenizer, TFGPT2LMHeadModel\n",
"\n",
"tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")\n",
"model = TFGPT2LMHeadModel.from_pretrained(\"gpt2\", from_pt=True, pad_token_id=tokenizer.eos_token_id)\n",
"model = TFGPT2LMHeadModel.from_pretrained(\n",
" \"gpt2\", from_pt=True, pad_token_id=tokenizer.eos_token_id\n",
")\n",
"model.save_pretrained(\"./tfgpt2model\", saved_model=True)"
]
},
Expand Down Expand Up @@ -114,11 +117,11 @@
"metadata": {},
"outputs": [],
"source": [
"resource_group = \"seldon\" # feel free to replace or use this default\n",
"aks_name = \"modeltests\" \n",
"resource_group = \"seldon\" # feel free to replace or use this default\n",
"aks_name = \"modeltests\"\n",
"\n",
"storage_account_name = \"modeltestsgpt\" # fill in\n",
"storage_container_name = \"gpt2onnx\" "
"storage_account_name = \"modeltestsgpt\" # fill in\n",
"storage_container_name = \"gpt2onnx\""
]
},
{
Expand Down Expand Up @@ -378,7 +381,6 @@
}
],
"source": [
"\n",
"!kubectl apply -f gpt2-deploy.yaml -n default"
]
},
Expand Down Expand Up @@ -491,39 +493,44 @@
}
],
"source": [
"import requests\n",
"import http\n",
"import json\n",
"\n",
"import numpy as np\n",
"import requests\n",
"from transformers import GPT2Tokenizer\n",
"\n",
"tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")\n",
"input_text = 'I love Artificial Intelligence'\n",
"input_text = \"I love Artificial Intelligence\"\n",
"count = 0\n",
"max_gen_len = 8\n",
"gen_sentence = input_text\n",
"while count < max_gen_len:\n",
" input_ids = tokenizer.encode(gen_sentence, return_tensors='tf')\n",
" input_ids = tokenizer.encode(gen_sentence, return_tensors=\"tf\")\n",
" shape = input_ids.shape.as_list()\n",
" payload = {\n",
" \"inputs\": [\n",
" {\"name\": \"input_ids:0\",\n",
" \"datatype\": \"INT32\",\n",
" \"shape\": shape,\n",
" \"data\": input_ids.numpy().tolist()\n",
" },\n",
" {\"name\": \"attention_mask:0\",\n",
" \"datatype\": \"INT32\",\n",
" \"shape\": shape,\n",
" \"data\": np.ones(shape, dtype=np.int32).tolist()\n",
" }\n",
" ]\n",
" }\n",
" \"inputs\": [\n",
" {\n",
" \"name\": \"input_ids:0\",\n",
" \"datatype\": \"INT32\",\n",
" \"shape\": shape,\n",
" \"data\": input_ids.numpy().tolist(),\n",
" },\n",
" {\n",
" \"name\": \"attention_mask:0\",\n",
" \"datatype\": \"INT32\",\n",
" \"shape\": shape,\n",
" \"data\": np.ones(shape, dtype=np.int32).tolist(),\n",
" },\n",
" ]\n",
" }\n",
"\n",
" tfserving_url = \"http://\" + str(ingress_ip) + \"/seldon/default/gpt2gpu/v2/models/gpt2/infer\"\n",
" print(f'sending request to {tfserving_url}')\n",
" \n",
" with requests.post(tfserving_url, json=payload) as ret: \n",
" tfserving_url = (\n",
" \"http://\" + str(ingress_ip) + \"/seldon/default/gpt2gpu/v2/models/gpt2/infer\"\n",
" )\n",
" print(f\"sending request to {tfserving_url}\")\n",
"\n",
" with requests.post(tfserving_url, json=payload) as ret:\n",
" try:\n",
" res = ret.json()\n",
" except:\n",
Expand All @@ -535,14 +542,15 @@
"\n",
" # take the best next token probability of the last token of input ( greedy approach)\n",
" next_token = logits.argmax(axis=2)[0]\n",
" next_token_str = tokenizer.decode(next_token[-1:], skip_special_tokens=True,\n",
" clean_up_tokenization_spaces=True).strip()\n",
" gen_sentence += ' ' + next_token_str\n",
" print (f'Sentence: {gen_sentence}')\n",
" next_token_str = tokenizer.decode(\n",
" next_token[-1:], skip_special_tokens=True, clean_up_tokenization_spaces=True\n",
" ).strip()\n",
" gen_sentence += \" \" + next_token_str\n",
" print(f\"Sentence: {gen_sentence}\")\n",
"\n",
" count += 1\n",
"\n",
"print(f'Input: {input_text}\\nOutput: {gen_sentence}')"
"print(f\"Input: {input_text}\\nOutput: {gen_sentence}\")"
]
},
{
Expand Down Expand Up @@ -721,41 +729,48 @@
}
],
"source": [
"from subprocess import run, Popen, PIPE\n",
"import base64\n",
"import json\n",
"from subprocess import PIPE, Popen, run\n",
"\n",
"import numpy as np\n",
"from transformers import TFGPT2LMHeadModel, GPT2Tokenizer\n",
"import base64\n",
"from transformers import GPT2Tokenizer, TFGPT2LMHeadModel\n",
"\n",
"tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")\n",
"input_text = 'I enjoy working in Seldon'\n",
"input_ids = tokenizer.encode(input_text, return_tensors='tf')\n",
"input_text = \"I enjoy working in Seldon\"\n",
"input_ids = tokenizer.encode(input_text, return_tensors=\"tf\")\n",
"shape = input_ids.shape.as_list()\n",
"payload = {\n",
"\t\t\"inputs\": [\n",
"\t\t\t{\"name\": \"input_ids:0\",\n",
"\t\t\t \"datatype\": \"INT32\",\n",
"\t\t\t \"shape\": shape,\n",
"\t\t\t \"data\": input_ids.numpy().tolist()\n",
"\t\t\t },\n",
"\t\t\t{\"name\": \"attention_mask:0\",\n",
"\t\t\t \"datatype\": \"INT32\",\n",
"\t\t\t \"shape\": shape,\n",
"\t\t\t \"data\": np.ones(shape, dtype=np.int32).tolist()\n",
"\t\t\t }\n",
"\t\t\t]\n",
"\t\t}\n",
"tfserving_url = \"http://\" + str(ingress_ip) + \"/seldon/default/gpt2gpu/v2/models/gpt2/infer\"\n",
"print(f'preparing request to {tfserving_url}')\n",
" \"inputs\": [\n",
" {\n",
" \"name\": \"input_ids:0\",\n",
" \"datatype\": \"INT32\",\n",
" \"shape\": shape,\n",
" \"data\": input_ids.numpy().tolist(),\n",
" },\n",
" {\n",
" \"name\": \"attention_mask:0\",\n",
" \"datatype\": \"INT32\",\n",
" \"shape\": shape,\n",
" \"data\": np.ones(shape, dtype=np.int32).tolist(),\n",
" },\n",
" ]\n",
"}\n",
"tfserving_url = (\n",
" \"http://\" + str(ingress_ip) + \"/seldon/default/gpt2gpu/v2/models/gpt2/infer\"\n",
")\n",
"print(f\"preparing request to {tfserving_url}\")\n",
"\n",
"cmd= {\"method\": \"POST\",\n",
"\t\t\"header\": {\"Content-Type\": [\"application/json\"] },\n",
"\t\t\"url\": tfserving_url,\n",
"\t\t\"body\": base64.b64encode(bytes(json.dumps(payload), \"utf-8\")).decode(\"utf-8\")}\n",
"cmd = {\n",
" \"method\": \"POST\",\n",
" \"header\": {\"Content-Type\": [\"application/json\"]},\n",
" \"url\": tfserving_url,\n",
" \"body\": base64.b64encode(bytes(json.dumps(payload), \"utf-8\")).decode(\"utf-8\"),\n",
"}\n",
"\n",
"with open(\"vegeta_target.json\", mode=\"w\") as file:\n",
"\tjson.dump(cmd, file)\n",
"\tfile.write('\\n\\n')"
" json.dump(cmd, file)\n",
" file.write(\"\\n\\n\")"
]
},
{
Expand Down
Loading

0 comments on commit 4d86baf

Please sign in to comment.