@@ -101,7 +101,10 @@ async def chat_completion_stream_generator(
101
101
role = self .get_chat_request_role (request )
102
102
for i in range (request .n ):
103
103
choice_data = ChatCompletionResponseStreamChoice (
104
- index = i , delta = DeltaMessage (role = role ), finish_reason = None )
104
+ index = i ,
105
+ delta = DeltaMessage (role = role ),
106
+ logprobs = None ,
107
+ finish_reason = None )
105
108
chunk = ChatCompletionStreamResponse (id = request_id ,
106
109
object = chunk_object_type ,
107
110
created = created_time ,
@@ -118,6 +121,7 @@ async def chat_completion_stream_generator(
118
121
"content" ) and request .messages [- 1 ].get (
119
122
"role" ) == role :
120
123
last_msg_content = request .messages [- 1 ]["content" ]
124
+
121
125
if last_msg_content :
122
126
for i in range (request .n ):
123
127
choice_data = ChatCompletionResponseStreamChoice (
@@ -129,6 +133,7 @@ async def chat_completion_stream_generator(
129
133
object = chunk_object_type ,
130
134
created = created_time ,
131
135
choices = [choice_data ],
136
+ logprobs = None ,
132
137
model = model_name )
133
138
data = chunk .model_dump_json (exclude_unset = True )
134
139
yield f"data: { data } \n \n "
@@ -145,15 +150,29 @@ async def chat_completion_stream_generator(
145
150
if finish_reason_sent [i ]:
146
151
continue
147
152
153
+ delta_token_ids = output .token_ids [previous_num_tokens [i ]:]
154
+ top_logprobs = output .logprobs [
155
+ previous_num_tokens [i ]:] if output .logprobs else None
156
+
157
+ if request .logprobs :
158
+ logprobs = self ._create_logprobs (
159
+ token_ids = delta_token_ids ,
160
+ top_logprobs = top_logprobs ,
161
+ num_output_top_logprobs = request .logprobs ,
162
+ initial_text_offset = len (previous_texts [i ]),
163
+ )
164
+ else :
165
+ logprobs = None
166
+
148
167
delta_text = output .text [len (previous_texts [i ]):]
149
168
previous_texts [i ] = output .text
150
169
previous_num_tokens [i ] = len (output .token_ids )
151
-
152
170
if output .finish_reason is None :
153
171
# Send token-by-token response for each request.n
154
172
choice_data = ChatCompletionResponseStreamChoice (
155
173
index = i ,
156
174
delta = DeltaMessage (content = delta_text ),
175
+ logprobs = logprobs ,
157
176
finish_reason = None )
158
177
chunk = ChatCompletionStreamResponse (
159
178
id = request_id ,
@@ -174,6 +193,7 @@ async def chat_completion_stream_generator(
174
193
choice_data = ChatCompletionResponseStreamChoice (
175
194
index = i ,
176
195
delta = DeltaMessage (content = delta_text ),
196
+ logprobs = logprobs ,
177
197
finish_reason = output .finish_reason )
178
198
chunk = ChatCompletionStreamResponse (
179
199
id = request_id ,
@@ -208,11 +228,25 @@ async def chat_completion_full_generator(
208
228
assert final_res is not None
209
229
210
230
choices = []
231
+
211
232
role = self .get_chat_request_role (request )
212
233
for output in final_res .outputs :
234
+ token_ids = output .token_ids
235
+ top_logprobs = output .logprobs
236
+
237
+ if request .logprobs :
238
+ logprobs = self ._create_logprobs (
239
+ token_ids = token_ids ,
240
+ top_logprobs = top_logprobs ,
241
+ num_output_top_logprobs = request .logprobs ,
242
+ )
243
+ else :
244
+ logprobs = None
245
+
213
246
choice_data = ChatCompletionResponseChoice (
214
247
index = output .index ,
215
248
message = ChatMessage (role = role , content = output .text ),
249
+ logprobs = logprobs ,
216
250
finish_reason = output .finish_reason ,
217
251
)
218
252
choices .append (choice_data )
0 commit comments