Skip to content

Commit

Permalink
Add C++ runtime for Tele-AI/TeleSpeech-ASR (#970)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Jun 4, 2024
1 parent f8dbc10 commit fd5a0d1
Show file tree
Hide file tree
Showing 52 changed files with 1,050 additions and 143 deletions.
19 changes: 10 additions & 9 deletions .github/scripts/test-dot-net.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,16 @@

cd dotnet-examples/

cd vad-non-streaming-asr-paraformer
cd ./offline-decode-files
./run-telespeech-ctc.sh
./run-nemo-ctc.sh
./run-paraformer.sh
./run-zipformer.sh
./run-hotwords.sh
./run-whisper.sh
./run-tdnn-yesno.sh

cd ../vad-non-streaming-asr-paraformer
./run.sh

cd ../offline-punctuation
Expand All @@ -22,14 +31,6 @@ cd ../online-decode-files
./run-transducer.sh
./run-paraformer.sh

cd ../offline-decode-files
./run-nemo-ctc.sh
./run-paraformer.sh
./run-zipformer.sh
./run-hotwords.sh
./run-whisper.sh
./run-tdnn-yesno.sh

cd ../offline-tts
./run-aishell3.sh
./run-piper.sh
Expand Down
33 changes: 33 additions & 0 deletions .github/scripts/test-offline-ctc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,39 @@ echo "PATH: $PATH"

which $EXE

log "test offline TeleSpeech CTC"
url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
name=$(basename $url)
repo=$(basename -s .tar.bz2 $name)

curl -SL -O $url
tar xvf $name
rm $name
ls -lh $repo

test_wavs=(
3-sichuan.wav
4-tianjin.wav
5-henan.wav
)
for w in ${test_wavs[@]}; do
time $EXE \
--tokens=$repo/tokens.txt \
--telespeech-ctc=$repo/model.int8.onnx \
--debug=1 \
$repo/test_wavs/$w
done

time $EXE \
--tokens=$repo/tokens.txt \
--telespeech-ctc=$repo/model.int8.onnx \
--debug=1 \
$repo/test_wavs/3-sichuan.wav \
$repo/test_wavs/4-tianjin.wav \
$repo/test_wavs/5-henan.wav

rm -rf $repo

log "-----------------------------------------------------------------"
log "Run Nemo fast conformer hybrid transducer ctc models (CTC branch)"
log "-----------------------------------------------------------------"
Expand Down
12 changes: 12 additions & 0 deletions .github/scripts/test-python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,18 @@ log() {

export GIT_CLONE_PROTECTION_ACTIVE=false

log "test offline TeleSpeech CTC"
url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
name=$(basename $url)
repo=$(basename -s .tar.bz2 $name)

curl -SL -O $url
tar xvf $name
rm $name
ls -lh $repo
python3 ./python-api-examples/offline-telespeech-ctc-decode-files.py
rm -rf $repo

log "test online NeMo CTC"

url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms.tar.bz2
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/build-wheels-macos-arm64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ jobs:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
python3 -m pip install --upgrade pip
python3 -m pip install wheel twine setuptools
python3 -m pip install --break-system-packages --upgrade pip
python3 -m pip install --break-system-packages wheel twine setuptools
twine upload ./wheelhouse/*.whl
90 changes: 90 additions & 0 deletions .github/workflows/build-wheels-macos-universal2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
name: build-wheels-macos-universal2

on:
push:
branches:
- wheel
tags:
- '*'
workflow_dispatch:

env:
SHERPA_ONNX_IS_IN_GITHUB_ACTIONS: 1

concurrency:
group: build-wheels-macos-universal2-${{ github.ref }}
cancel-in-progress: true

jobs:
build_wheels_macos_universal2:
name: ${{ matrix.python-version }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [macos-latest]
python-version: ["cp38", "cp39", "cp310", "cp311", "cp312"]

steps:
- uses: actions/checkout@v4

- name: Build wheels
uses: pypa/cibuildwheel@v2.15.0
env:
CIBW_BUILD: "${{ matrix.python-version}}-* "
CIBW_ENVIRONMENT: SHERPA_ONNX_CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES='arm64;x86_64'"
CIBW_ARCHS: "universal2"
CIBW_BUILD_VERBOSITY: 3

# Don't repair macOS wheels
CIBW_REPAIR_WHEEL_COMMAND_MACOS: ""

- name: Display wheels
shell: bash
run: |
ls -lh ./wheelhouse/
- uses: actions/upload-artifact@v4
with:
name: wheel-${{ matrix.python-version }}
path: ./wheelhouse/*.whl

- name: Publish to huggingface
if: matrix.python-version == 'cp38'
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v3
with:
max_attempts: 20
timeout_seconds: 200
shell: bash
command: |
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
rm -rf huggingface
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
git clone https://huggingface.co/csukuangfj/sherpa-onnx-wheels huggingface
cd huggingface
git fetch
git pull
git merge -m "merge remote" --ff origin main
cp -v ../wheelhouse/*.whl .
git status
git add .
git commit -m "add more wheels"
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-wheels main
- name: Publish wheels to PyPI
env:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
python3 -m pip install --break-system-packages --upgrade pip
python3 -m pip install --break-system-packages wheel twine setuptools
twine upload ./wheelhouse/*.whl
4 changes: 2 additions & 2 deletions .github/workflows/build-wheels-macos-x64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ jobs:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
python3 -m pip install --upgrade pip
python3 -m pip install wheel twine setuptools
python3 -m pip install --break-system-packages --upgrade pip
python3 -m pip install --break-system-packages wheel twine setuptools
twine upload ./wheelhouse/*.whl
46 changes: 46 additions & 0 deletions .github/workflows/export-telespeech-ctc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,49 @@ jobs:
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: asr-models

- name: Publish float32 model to huggingface
shell: bash
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
src=scripts/tele-speech/sherpa-onnx-telespeech-ctc-zh-2024-06-04
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
export GIT_CLONE_PROTECTION_ACTIVE=false
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-telespeech-ctc-zh-2024-06-04 hf
cp -a $src/* hf/
cd hf
git lfs track "*.pdf"
git lfs track "*.onnx"
git add .
git commit -m 'add model files' || true
git status
ls -lh
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-telespeech-ctc-zh-2024-06-04 main || true
rm -rf hf
- name: Publish int8 model to huggingface
shell: bash
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
src=scripts/tele-speech/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
export GIT_CLONE_PROTECTION_ACTIVE=false
rm -rf hf
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04 hf
cp -a $src/* hf/
cd hf
git lfs track "*.pdf"
git lfs track "*.onnx"
git add .
git commit -m 'add model files' || true
git status
ls -lh
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04 main || true
16 changes: 8 additions & 8 deletions .github/workflows/linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -130,34 +130,34 @@ jobs:
name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
path: install/*

- name: Test online transducer
- name: Test offline CTC
shell: bash
run: |
du -h -d1 .
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx
export EXE=sherpa-onnx-offline
.github/scripts/test-online-transducer.sh
.github/scripts/test-offline-ctc.sh
du -h -d1 .
- name: Test online transducer (C API)
- name: Test online transducer
shell: bash
run: |
du -h -d1 .
export PATH=$PWD/build/bin:$PATH
export EXE=decode-file-c-api
export EXE=sherpa-onnx
.github/scripts/test-online-transducer.sh
du -h -d1 .
- name: Test offline CTC
- name: Test online transducer (C API)
shell: bash
run: |
du -h -d1 .
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline
export EXE=decode-file-c-api
.github/scripts/test-offline-ctc.sh
.github/scripts/test-online-transducer.sh
du -h -d1 .
- name: Test spoken language identification (C++ API)
Expand Down
14 changes: 8 additions & 6 deletions .github/workflows/macos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,14 @@ jobs:
otool -L build/bin/sherpa-onnx
otool -l build/bin/sherpa-onnx
- name: Test offline CTC
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline
.github/scripts/test-offline-ctc.sh
- name: Test offline transducer
shell: bash
run: |
Expand Down Expand Up @@ -192,13 +200,7 @@ jobs:
.github/scripts/test-offline-whisper.sh
- name: Test offline CTC
shell: bash
run: |
export PATH=$PWD/build/bin:$PATH
export EXE=sherpa-onnx-offline
.github/scripts/test-offline-ctc.sh
- name: Test online transducer
shell: bash
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/swift.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [macos-13]
os: [macos-latest, macos-14]

steps:
- uses: actions/checkout@v4
Expand Down
18 changes: 10 additions & 8 deletions .github/workflows/test-go.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,12 @@ concurrency:

jobs:
test-go:
name: ${{ matrix.os }} ${{matrix.arch }}
name: ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
include:
- os: macos-latest
arch: amd64
os: [macos-latest, macos-14]

steps:
- uses: actions/checkout@v4
Expand All @@ -47,7 +45,7 @@ jobs:
- name: ccache
uses: hendrikmuhs/ccache-action@v1.2
with:
key: ${{ matrix.os }}-${{ matrix.arch }}
key: ${{ matrix.os }}-go

- uses: actions/setup-go@v5
with:
Expand Down Expand Up @@ -109,8 +107,6 @@ jobs:
go build
ls -lh
git lfs install
echo "Test vits-ljs"
./run-vits-ljs.sh
rm -rf vits-ljs
Expand Down Expand Up @@ -144,7 +140,13 @@ jobs:
go build
ls -lh
git lfs install
echo "Test telespeech ctc"
./run-telespeech-ctc.sh
rm -rf sherpa-onnx-telespeech-ctc-*
echo "Test transducer"
./run-transducer.sh
rm -rf sherpa-onnx-zipformer-en-2023-06-26
echo "Test transducer"
./run-transducer.sh
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test-piper-phonemize.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:
mkdir build
cd build
cmake -DCMAKE_VERBOSE_MAKEFILE=ON -D SHERPA_ONNX_ENABLE_TESTS=ON -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} -D BUILD_SHARED_LIBS=${{ matrix.shared_lib }} -DCMAKE_INSTALL_PREFIX=./install ..
cmake -DSHERPA_ONNX_ENABLE_EPSEAK_NG_EXE=ON -DBUILD_ESPEAK_NG_EXE=ON -DCMAKE_VERBOSE_MAKEFILE=ON -D SHERPA_ONNX_ENABLE_TESTS=ON -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} -D BUILD_SHARED_LIBS=${{ matrix.shared_lib }} -DCMAKE_INSTALL_PREFIX=./install ..
- name: Build
shell: bash
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,4 @@ node_modules
package-lock.json
sherpa-onnx-nemo-*
sherpa-onnx-vits-*
sherpa-onnx-telespeech-ctc-*
Loading

0 comments on commit fd5a0d1

Please sign in to comment.