Skip to content

Commit

Permalink
BUG: stop parameter leads to failure with transformers backend (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
ChengjieLi28 authored Dec 13, 2024
1 parent 609b825 commit c6b064b
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 9 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/python.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,12 @@ jobs:
run: |
python -m pip install -U pip setuptools
- name: Install numpy
if: |
startsWith(matrix.os, 'macos') && matrix.python-version == '3.12'
run: |
python -m pip install "numpy<2"
- name: Install dependencies
env:
MODULE: ${{ matrix.module }}
Expand Down
2 changes: 1 addition & 1 deletion xinference/deploy/docker/cpu.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ ARG PIP_INDEX=https://pypi.org/simple
RUN python -m pip install --upgrade -i "$PIP_INDEX" pip && \
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu && \
pip install -i "$PIP_INDEX" --upgrade-strategy only-if-needed -r /opt/inference/xinference/deploy/docker/requirements_cpu.txt && \
pip install llama-cpp-python && \
CMAKE_ARGS="-DLLAVA_BUILD=OFF" pip install llama-cpp-python && \
cd /opt/inference && \
python setup.py build_web && \
git restore . && \
Expand Down
24 changes: 16 additions & 8 deletions xinference/model/llm/transformers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ def _get_completion(
finish_reason: Optional[str],
model_uid: str,
r: InferenceRequest,
completion_tokens: int,
):
completion_choice = CompletionChoice(
text=output, index=0, logprobs=None, finish_reason=finish_reason
Expand All @@ -170,8 +171,8 @@ def _get_completion(
)
completion_usage = CompletionUsage(
prompt_tokens=len(r.prompt_tokens),
completion_tokens=len(r.new_tokens),
total_tokens=len(r.prompt_tokens) + len(r.new_tokens),
completion_tokens=completion_tokens,
total_tokens=len(r.prompt_tokens) + completion_tokens,
)
completion = Completion(
id=completion_chunk["id"],
Expand Down Expand Up @@ -371,7 +372,7 @@ def _batch_inference_one_step_internal(
r.stopped = stopped
r.finish_reason = finish_reason

if r.stopped and r not in stop_token_mapping and r not in output_mapping:
if r.stopped and r not in stop_token_mapping:
stop_token_mapping[r] = _i + 1

if r.stream:
Expand Down Expand Up @@ -446,12 +447,14 @@ def _batch_inference_one_step_internal(
else:
# last round, handle non-stream result
if r.stopped and _i == decode_round - 1:
invalid_token_num = decode_round - stop_token_mapping[r]
invalid_token_num = (
(decode_round - stop_token_mapping[r] + 1)
if r.finish_reason == "stop"
else (decode_round - stop_token_mapping[r])
)
outputs = (
tokenizer.decode(
r.new_tokens[: -(invalid_token_num + 1)]
if r.finish_reason == "stop"
else r.new_tokens[:-invalid_token_num],
r.new_tokens[:-invalid_token_num],
skip_special_tokens=True,
spaces_between_special_tokens=False,
clean_up_tokenization_spaces=True,
Expand All @@ -460,7 +463,12 @@ def _batch_inference_one_step_internal(
else output_mapping[r]
)
completion = _get_completion(
outputs, r.chunk_id, r.finish_reason, model_uid, r
outputs,
r.chunk_id,
r.finish_reason,
model_uid,
r,
len(r.new_tokens) - invalid_token_num,
)
r.completion = [completion]

Expand Down

0 comments on commit c6b064b

Please sign in to comment.