Skip to content

Commit

Permalink
Feature/vllm with chat api (#467)
Browse files Browse the repository at this point in the history
  • Loading branch information
zzhangpurdue authored Jun 4, 2024
1 parent 569b257 commit 0f3e2a8
Show file tree
Hide file tree
Showing 4 changed files with 195 additions and 14 deletions.
32 changes: 30 additions & 2 deletions modelscope_agent/llm/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@ def __init__(self,
support_stream: Optional[bool] = None,
**kwargs):
super().__init__(model, model_server, is_function_call)

api_base = kwargs.get('api_base', 'https://api.openai.com/v1').strip()
default_api_base = os.getenv('OPENAI_API_BASE',
'https://api.openai.com/v1')
api_base = kwargs.get('api_base', default_api_base).strip()
api_key = kwargs.get('api_key',
os.getenv('OPENAI_API_KEY',
default='EMPTY')).strip()
logger.info(f'client url {api_base}, client key: {api_key}')
self.client = OpenAI(api_key=api_key, base_url=api_base)
self.is_function_call = is_function_call
self.is_chat = is_chat
Expand Down Expand Up @@ -154,3 +156,29 @@ def chat_with_functions(self,
model=self.model, messages=messages, **kwargs)
# TODO: error handling
return response.choices[0].message


@register_llm('vllm')
class Vllm(BaseChatModel):

def _chat_stream(self,
messages: List[Dict],
stop: Optional[List[str]] = None,
**kwargs) -> Iterator[str]:
stop = self._update_stop_word(stop)
logger.info(
f'call openai api, model: {self.model}, messages: {str(messages)}, '
f'stop: {str(stop)}, stream: True, args: {str(kwargs)}')
response = self.client.chat.completions.create(
model=self.model, messages=messages, stop=stop, stream=True)
response = self.stat_last_call_token_info(response)
# TODO: error handling
for chunk in response:
# sometimes delta.content is None by vllm, we should not yield None
if len(chunk.choices) > 0 and hasattr(
chunk.choices[0].delta,
'content') and chunk.choices[0].delta.content:
logger.info(
f'call openai api success, output: {chunk.choices[0].delta.content}'
)
yield chunk.choices[0].delta.content
83 changes: 82 additions & 1 deletion modelscope_agent_servers/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,12 @@ cd modelscope-agent
sh scripts/run_assistant_server.sh

# start the assistant server with specified backend
sh scripts/run_assistant_server.sh dashscope
sh scripts/run_assistant_server.sh --model-server dashscope
```

# start the assistant server with specified model as vllm
sh scripts/run_assistant_server.sh --served-model-name Qwen2-1.5B-Instruct --model path/to/weights

### Use case

#### Chat
Expand Down Expand Up @@ -141,6 +144,84 @@ With above examples, the output should be like this:
"usage":{"prompt_tokens":267,"completion_tokens":15,"total_tokens":282}}
```

#### Chat with vllm

User could also use the chat api with the same manner of vllm, by passing `--served-model-name` and `--model`.

An using case is shown below.

```shell
sh run_script_2.sh --model_name Qwen2-1.5B-Instruct --model_dir /path/to/Qwen2-1___5B-Instruct
```

Then you could use `curl` to request this API or call python api as shown before

```Shell
curl -X POST 'http://localhost:31512/v1/chat/completions' \
-H 'Content-Type: application/json' \
-d '{
"tools": [{
"type": "function",
"function": {
"name": "amap_weather",
"description": "amap weather tool",
"parameters": [{
"name": "location",
"type": "string",
"description": "城市/区具体名称,如`北京市海淀区`请描述为`海淀区`",
"required": true
}]
}
}],
"tool_choice": "auto",
"model": "Qwen2-1.5B-Instruct",
"messages": [
{"content": "海淀区天气", "role": "user"}
]
}'

```

With above examples, the output should be like this:
```shell
{
"request_id": "chatcmpl_3f020464-e98d-4c7b-8717-9fca56784fe6",
"message": "",
"output": null,
"id": "chatcmpl_3f020464-e98d-4c7b-8717-9fca56784fe6",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "好的,我已经调用了amap_weather工具查询了海淀区的天气情况。现在,让我为您展示一下查询结果吧。\n\n工具调用\nAction: amap_weather\nAction Input: {\"location\": \"海淀区\"}\n",
"tool_calls": [
{
"type": "function",
"function": {
"name": "amap_weather",
"arguments": "{\"location\": \"海淀区\"}"
}
}
]
},
"finish_reason": "tool_calls"
}
],
"created": 1717485704,
"model": "Qwen2-1.5B-Instruct",
"system_fingerprint": "chatcmpl_3f020464-e98d-4c7b-8717-9fca56784fe6",
"object": "chat.completion",
"usage": {
"prompt_tokens": 237,
"completion_tokens": 48,
"total_tokens": 285
}
}
```



#### Assistant

To interact with the chat API, you should construct a object like `AgentRequest` on the client side, and then use the requests library to send it as the request body.
Expand Down
5 changes: 2 additions & 3 deletions modelscope_agent_servers/assistant_server/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
DEFAULT_KNOWLEDGE_PATH = 'knowledges'
DEFAULT_INDEX_PATH = 'index'

model_server = os.environ.get('MODEL_SERVER', 'dashscope')
app = FastAPI()


Expand Down Expand Up @@ -129,12 +128,12 @@ async def chat_completion(chat_request: ChatCompletionRequest,
user = chat_request.user
model = chat_request.model
# remove the prefix 'Bearer ' from the authorization header
auth = authorization[7:] if authorization else ''
auth = authorization[7:] if authorization else 'EMPTY'

# llm_config
llm_config = {
'model': model,
'model_server': model_server,
'model_server': os.environ.get('MODEL_SERVER', 'dashscope'),
'api_key': auth
}

Expand Down
89 changes: 81 additions & 8 deletions scripts/run_assistant_server.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,91 @@
echo "Installing dependencies from requirements.txt..."
pip3 install -r modelscope_agent_servers/requirements.txt

# Initialize optional variables with empty strings as default values
MODEL_DIR=""
MODEL_SERVER=""
MODEL_NAME=""

# Loop through arguments and process them
while [[ $# -gt 0 ]]; do
case $1 in
--served-model-name)
MODEL_NAME="$2"
shift # past argument
shift # past value
;;
--model)
MODEL_DIR="$2"
shift # past argument
shift # past value
;;
--model-server)
MODEL_SERVER="$2"
shift # past argument
shift # past value
;;
*) # unknown option
shift # past argument
;;
esac
done

# Optionally, echo variables for debugging or confirmation
echo "Model name: $MODEL_NAME"
echo "Model directory: $MODEL_DIR"
echo "Model server: $MODEL_SERVER"


# running
echo "Running fastapi assistant server at port 31512."
export PYTHONPATH=$PYTHONPATH:modelscope_agent_servers

if [ -z "$1" ]; then
export MODEL_SERVER=dashscope
else
export MODEL_SERVER=$1
fi
if [ "$MODEL_DIR" != "" ]; then
echo "Running vllm server, please make sure install vllm"
# Start the first server in the background on port 8000
python -m vllm.entrypoints.openai.api_server --served-model-name $MODEL_NAME --model $MODEL_DIR & SERVER_1_PID=$!
export MODEL_SERVER=vllm
export OPENAI_API_BASE=http://localhost:8000/v1
echo "Model server: $MODEL_SERVER"
echo "OPENAI_API_BASE: $OPENAI_API_BASE"

# Function to check if the first server is up
check_first_server() {
echo "Checking if Server 1 is up..."
for i in {1..10}; do # try up to 10 times
curl -s http://localhost:8000 > /dev/null
if [ $? -eq 0 ]; then
echo "Server 1 is up and running."
return 0
else
echo "Server 1 is not ready yet. Retrying..."
sleep 4
fi
done
return 1
}

if [ "$MODEL_SERVER" == "ollama" ]; then
ollama serve
# Wait for the first server to be up
if check_first_server; then
# Start the second server on port 8001
echo "Starting Server 2..."
uvicorn modelscope_agent_servers.assistant_server.api:app --host 0.0.0.0 --port 31512 & $SERVER_2_PID
else
echo "Failed to start Server 1."
exit 1
fi
# Kill the first server when the second server is stopped
wait $SERVER_1_PID
wait $SERVER_2_PID

uvicorn modelscope_agent_servers.assistant_server.api:app --host 0.0.0.0 --port 31512
elif [ -n "$MODEL_SERVER" ]; then
echo "Running specified model server: $MODEL_SERVER..."
if [ "$MODEL_SERVER" == "ollama" ]; then
ollama serve
fi
uvicorn modelscope_agent_servers.assistant_server.api:app --host 0.0.0.0 --port 31512
else
MODEL_SERVER=dashscope
echo "Running FastAPI assistant server at port 31512 as default."
uvicorn modelscope_agent_servers.assistant_server.api:app --host 0.0.0.0 --port 31512
fi

0 comments on commit 0f3e2a8

Please sign in to comment.