diff --git a/src/vllm_tgis_adapter/grpc/grpc_server.py b/src/vllm_tgis_adapter/grpc/grpc_server.py index cf779b3..b948b49 100644 --- a/src/vllm_tgis_adapter/grpc/grpc_server.py +++ b/src/vllm_tgis_adapter/grpc/grpc_server.py @@ -345,7 +345,13 @@ async def GenerateStream( prompt=request.request.text, prompt_token_ids=input_ids, ) - + kwargs = {} + is_tracing_enabled = await self.engine.is_tracing_enabled() + headers = dict(context.invocation_metadata()) + if is_tracing_enabled: + kwargs["trace_headers"] = extract_trace_headers(headers) + elif contains_trace_headers(headers): + log_tracing_disabled_warning() result_generator = self.engine.generate( # prompt is supplied for observability, the text is not # re-tokenized when `prompt_token_ids` is supplied @@ -353,6 +359,7 @@ async def GenerateStream( sampling_params=sampling_params, request_id=request_id, **adapter_kwargs, + **kwargs, ) resp_options = request.params.response