Skip to content

[bug fix] avoid returning stop tokens in response. #19

[bug fix] avoid returning stop tokens in response.

[bug fix] avoid returning stop tokens in response. #19

Workflow file for this run

name: Publish docker image
# Build & Push scalellm docker image on creation of tags to https://hub.docker.com/r/vectorchai/scalellm
# Push events to matching v*, i.e. v1.0.0, v1.0.0-rc1, v20.15.10-rc5, etc.
on:
push:
tags:
- v[0-9]+.[0-9]+.[0-9]+*
jobs:
publish_scalellm:
runs-on: [self-hosted, linux, x64, 1gpu]
steps:
- uses: olegtarasov/get-tag@v2.1
id: tagName
- name: Checkout repository
uses: actions/checkout@v3
with:
submodules: recursive
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_HUB_USER }}
password: ${{ secrets.DOCKER_HUB_TOKEN }}
- name: Build and push gateway
uses: docker/build-push-action@v5
with:
context: ./gateway
push: true
no-cache: true
tags: |
vectorchai/scalellm-gateway:${{ steps.tagName.outputs.tag }}
vectorchai/scalellm-gateway:latest
- name: Build and push scalellm for cuda 12.1
uses: docker/build-push-action@v5
with:
context: .
push: true
no-cache: true
tags: |
vectorchai/scalellm:${{ steps.tagName.outputs.tag }}
vectorchai/scalellm:latest
- name: Build and push scalellm for cuda 11.8
uses: docker/build-push-action@v5
with:
context: .
push: true
no-cache: true
build-args: |
BASE_IMAGE=nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu22.04
tags: |
vectorchai/scalellm_cu118:${{ steps.tagName.outputs.tag }}
vectorchai/scalellm_cu118:latest