Skip to content

Commit

Permalink
add tracing support
Browse files Browse the repository at this point in the history
- add add OpenTelemetry packages
- respect trace headers in grpc server

Signed-off-by: Prashant Gupta <prashantgupta@us.ibm.com>
  • Loading branch information
ronensc authored and dtrifiro committed Jun 27, 2024
1 parent f28cbc8 commit 6b136ae
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 4 deletions.
13 changes: 9 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,17 @@ dynamic = ["version"]
dependencies = [
"vllm>=0.5.0",
"prometheus_client==0.20.0",
"grpcio==1.64.1",
"grpcio-health-checking==1.62.1",
"grpcio-reflection==1.64.1",
"grpcio==1.62.2",
"grpcio-health-checking==1.62.2",
"grpcio-reflection==1.62.2",
"transformers",
"accelerate==0.31.0",
"hf-transfer==0.1.6"
"hf-transfer==0.1.6",
# additional dependencies for OpenTelemetry tracing
"opentelemetry-sdk",
"opentelemetry-api",
"opentelemetry-exporter-otlp",
"opentelemetry-semantic-conventions-ai"
]

[project.urls]
Expand Down
21 changes: 21 additions & 0 deletions src/vllm_tgis_adapter/grpc/grpc_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,18 @@
)
from .validation import validate_input, validate_params

try:
from vllm.tracing import (
contains_trace_headers,
extract_trace_headers,
log_tracing_disabled_warning,
)
except ImportError:
_vllm_tracing_available = False
else:
_vllm_tracing_available = True


if TYPE_CHECKING:
import argparse
from collections.abc import AsyncIterator, MutableSequence
Expand Down Expand Up @@ -224,12 +236,21 @@ async def Generate(
prompt=req.text,
prompt_token_ids=input_ids,
)
kwargs = {}
if _vllm_tracing_available:
is_tracing_enabled = await self.engine.is_tracing_enabled()
headers = dict(context.invocation_metadata())
if is_tracing_enabled:
kwargs["trace_headers"] = extract_trace_headers(headers)
elif contains_trace_headers(headers):
log_tracing_disabled_warning()
generators.append(
self.engine.generate(
inputs=inputs,
sampling_params=sampling_params,
request_id=f"{request_id}-{i}",
**adapter_kwargs,
**kwargs,
),
)

Expand Down

0 comments on commit 6b136ae

Please sign in to comment.