Skip to content

Commit

Permalink
openai_api
Browse files Browse the repository at this point in the history
  • Loading branch information
x54-729 committed Jan 24, 2024
1 parent bb3a711 commit 17469ee
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 10 deletions.
2 changes: 1 addition & 1 deletion tools/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ if __name__ == "__main__":
openai.api_base = "http://localhost:8000/internlm"
openai.api_key = "none"
for chunk in openai.ChatCompletion.create(
model="internlm-chat-7b",
model="internlm2-chat-7b",
messages=[
{"role": "user", "content": "你好"},
],
Expand Down
2 changes: 1 addition & 1 deletion tools/README_EN.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ if __name__ == "__main__":
openai.api_base = "http://localhost:8000/internlm"
openai.api_key = "none"
for chunk in openai.ChatCompletion.create(
model="internlm-chat-7b",
model="internlm2-chat-7b",
messages=[
{"role": "user", "content": "Hello!"},
],
Expand Down
17 changes: 9 additions & 8 deletions tools/openai_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
from sse_starlette.sse import EventSourceResponse

from transformers import AutoModelForCausalLM, AutoTokenizer


@asynccontextmanager
async def lifespan(app: FastAPI): # collects GPU memory
async def lifespan(app: FastAPI): # collects GPU memory # pylint: disable=W0613
yield
if torch.cuda.is_available():
torch.cuda.empty_cache()
Expand Down Expand Up @@ -85,13 +86,13 @@ class ChatCompletionResponse(BaseModel):

@app.get("/internlm/models", response_model=ModelList)
async def list_models():
model_card = ModelCard(id="internlm")
model_card = ModelCard(id="internlm2")
return ModelList(data=[model_card])


@app.post("/internlm/chat/completions", response_model=ChatCompletionResponse)
async def create_chat_completion(request: ChatCompletionRequest):
global model, tokenizer
global model, tokenizer # pylint: disable=W0602

if request.messages[-1].role != "user":
raise HTTPException(status_code=400, detail="Invalid request")
Expand Down Expand Up @@ -120,11 +121,11 @@ async def create_chat_completion(request: ChatCompletionRequest):


async def predict(query: str, history: List[List[str]], model_id: str):
global model, tokenizer
global model, tokenizer # pylint: disable=W0602

choice_data = ChatCompletionResponseStreamChoice(index=0, delta=DeltaMessage(role="assistant"), finish_reason=None)
chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk")
yield "{}".format(chunk.json(exclude_unset=True, ensure_ascii=False))
yield "{}".format(chunk.model_dump_json(exclude_unset=True))

current_length = 0

Expand All @@ -140,16 +141,16 @@ async def predict(query: str, history: List[List[str]], model_id: str):
index=0, delta=DeltaMessage(content=new_text), finish_reason=None
)
chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk")
yield "{}".format(chunk.json(exclude_unset=True, ensure_ascii=False))
yield "{}".format(chunk.model_dump_json(exclude_unset=True))

choice_data = ChatCompletionResponseStreamChoice(index=0, delta=DeltaMessage(), finish_reason="stop")
chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk")
yield "{}".format(chunk.json(exclude_unset=True, ensure_ascii=False))
yield "{}".format(chunk.model_dump_json(exclude_unset=True))
yield "[DONE]"


if __name__ == "__main__":
model_name = "internlm/internlm-chat-7b"
model_name = "internlm/internlm2-chat-7b"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
model.eval()
Expand Down

0 comments on commit 17469ee

Please sign in to comment.