Skip to content

Commit

Permalink
ci
Browse files Browse the repository at this point in the history
  • Loading branch information
truskovskiyk committed Sep 7, 2024
1 parent bdab9c3 commit 807c3ec
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 48 deletions.
1 change: 1 addition & 0 deletions module-5/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.lock-file
16 changes: 2 additions & 14 deletions module-5/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ k9s -A


```
export WANDB_API_KEY='put your key'
export WANDB_API_KEY='your key here'
```


Expand Down Expand Up @@ -86,23 +86,11 @@ pytest -ss ./tests

# Triton Inference Server


## PyTriton

```
docker run -v $PWD:/dev_data --shm-size=1g --ulimit memlock=-1 --net=host --ulimit stack=67108864 -ti nvcr.io/nvidia/tritonserver:23.11-vllm-python-py3 /bin/bash
pip install -r /dev_data/requirements.txt
export WANDB_API_KEY=cb86168a2e8db7edb905da69307450f5e7867d66
tritonserver --http-port 5000 --model-repository /dev_data/triton-python-example/
make run_pytriton
```





# LLMs


Expand Down
14 changes: 5 additions & 9 deletions module-5/serving/pytriton_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,12 @@

# https://triton-inference-server.github.io/pytriton/latest/clients/
def main():
sequence = np.array([
["one day I will see the world"],
])
sequence = np.char.encode(sequence, "utf-8")
text = np.array([["one day I will see the world"],])
text = np.char.encode(text, "utf-8")

with ModelClient("0.0.0.0", "BART") as client:
result_dict = client.infer_batch(sequence)
for output_name, output_data in result_dict.items():
output_data = np.array2string(output_data, threshold=np.inf, max_line_width=np.inf, separator=",").replace("\n", "")
print(f"{output_name}: {output_data}.")
with ModelClient("0.0.0.0", "predictor_a") as client:
result_dict = client.infer_batch(text=text)
print(result_dict['probs'])


if __name__ == "__main__":
Expand Down
34 changes: 9 additions & 25 deletions module-5/serving/pytriton_serving.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import logging

import numpy as np
from transformers import pipeline

from pytriton.decorators import batch
from pytriton.model_config import ModelConfig, Tensor
Expand All @@ -14,42 +13,27 @@

predictor = Predictor.default_from_model_registry()

# Labels pre-cached on server side
LABELS = [
"travel",
"cooking",
"dancing",
"sport",
"music",
"entertainment",
"festival",
"movie",
"literature",
]


@batch
def _infer_fn(sequence: np.ndarray):
sequence = np.char.decode(sequence.astype("bytes"), "utf-8")
sequence = sequence.tolist()[0]
def _infer_fn(text: np.ndarray):
text = np.char.decode(text.astype("bytes"), "utf-8")
text = text.tolist()[0]

logger.info(f"sequence = {sequence}")
results = predictor.predict(text=sequence)
logger.info(f"sequence = {text}")
results = predictor.predict(text=text)
logger.info(f"results = {results}")

result_labels = ['travel' for _ in range(len(sequence))]
return {"label": np.char.encode(result_labels, "utf-8")}
return [results]


def main():

with Triton() as triton:
logger.info("Loading BART model.")
triton.bind(
model_name="BART",
model_name="predictor_a",
infer_func=_infer_fn,
inputs=[Tensor(name="sequence", dtype=bytes, shape=(-1,)),],
outputs=[Tensor(name="label", dtype=bytes, shape=(1,)),],
inputs=[Tensor(name="text", dtype=bytes, shape=(-1,)),],
outputs=[Tensor(name="probs", dtype=np.float32, shape=(-1,)),],
config=ModelConfig(max_batch_size=1),
)
logger.info("Serving inference")
Expand Down

0 comments on commit 807c3ec

Please sign in to comment.