Skip to content

Commit

Permalink
Add dummy_client_grpc.py
Browse files Browse the repository at this point in the history
  • Loading branch information
ronensc committed Jun 18, 2024
1 parent 8117d60 commit b718bf4
Showing 1 changed file with 41 additions and 0 deletions.
41 changes: 41 additions & 0 deletions examples/production_monitoring/dummy_client_grpc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import grpc
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
OTLPSpanExporter)
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import (BatchSpanProcessor,
ConsoleSpanExporter)
from opentelemetry.trace import SpanKind, set_tracer_provider
from opentelemetry.trace.propagation.tracecontext import (
TraceContextTextMapPropagator)

from vllm.entrypoints.grpc.pb import generation_pb2, generation_pb2_grpc

trace_provider = TracerProvider()
set_tracer_provider(trace_provider)

trace_provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter()))
trace_provider.add_span_processor(BatchSpanProcessor(ConsoleSpanExporter()))
tracer = trace_provider.get_tracer("dummy-client")

with grpc.insecure_channel("localhost:50051") as channel:
stub = generation_pb2_grpc.GenerationServiceStub(channel)

with tracer.start_as_current_span("client-span",
kind=SpanKind.CLIENT) as span:
prompt = "San Francisco is a"
span.set_attribute("prompt", prompt)

# Inject the current context into the gRPC metadata
headers = {}
TraceContextTextMapPropagator().inject(headers)
metadata = list(headers.items())

reqs = [generation_pb2.GenerationRequest(text=prompt, )]

req = generation_pb2.BatchedGenerationRequest(
model_id="facebook/opt-125m",
requests=reqs,
params=generation_pb2.Parameters(
sampling=generation_pb2.SamplingParameters(temperature=0.0),
stopping=generation_pb2.StoppingCriteria(max_new_tokens=10)))
response = stub.Generate(req, metadata=metadata)

0 comments on commit b718bf4

Please sign in to comment.