Skip to content

Commit

Permalink
Introduce /v1/completions endpoint in new server
Browse files Browse the repository at this point in the history
  • Loading branch information
jart committed Nov 23, 2024
1 parent 0965a4b commit 241bf21
Show file tree
Hide file tree
Showing 4 changed files with 522 additions and 0 deletions.
2 changes: 2 additions & 0 deletions llamafile/server/client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,8 @@ Client::dispatcher()
return embedding();
if (p1 == "v1/embeddings")
return embedding();
if (p1 == "v1/completions")
return v1_completions();
if (p1 == "v1/chat/completions")
return v1_chat_completions();
if (p1 == "slotz")
Expand Down
4 changes: 4 additions & 0 deletions llamafile/server/client.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ struct Slot;
struct Worker;
struct TokenizeParams;
struct EmbeddingParams;
struct V1CompletionParams;
struct V1ChatCompletionParams;

struct Client
Expand Down Expand Up @@ -109,6 +110,9 @@ struct Client
bool embedding() __wur;
bool get_embedding_params(EmbeddingParams*) __wur;

bool v1_completions() __wur;
bool get_v1_completions_params(V1CompletionParams*) __wur;

bool v1_chat_completions() __wur;
bool get_v1_chat_completions_params(V1ChatCompletionParams*) __wur;

Expand Down
2 changes: 2 additions & 0 deletions llamafile/server/v1_chat_completions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,8 @@ Client::v1_chat_completions()
choice.getObject().erase("delta");
if (!send_response_chunk(response->content))
return false;
if (!send_response_chunk("data: [DONE]\n\n"))
return false;
return send_response_finish();
} else {
Json& usage = response->json["usage"];
Expand Down
Loading

0 comments on commit 241bf21

Please sign in to comment.