diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 0000000..a76165e --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,78 @@ +name: build +on: [workflow_call] + +jobs: + build: + runs-on: ubuntu-24.04 + strategy: + fail-fast: false + matrix: + target: + - name: ENABLE_CUDA=true stt-linux-amd64 + artifact: stt-linux-amd64-cuda + - name: stt-linux-amd64 + artifact: stt-linux-amd64-cpu + - name: ENABLE_CUDA=true sttd-linux-amd64 + artifact: sttd-linux-amd64-cuda + - name: sttd-linux-amd64 + artifact: sttd-linux-amd64-cpu + - name: ENABLE_CUDA=true subtitleswindow-linux-amd64 + artifact: subtitleswindow-linux-amd64-cuda + - name: subtitleswindow-linux-amd64 + artifact: subtitleswindow-linux-amd64-cpu + - name: ENABLE_CUDA=true subtitleswindow-windows-amd64 + artifact: subtitleswindow-windows-amd64-cuda + - name: subtitleswindow-windows-amd64 + artifact: subtitleswindow-windows-amd64-cpu + name: build + steps: + - uses: actions/checkout@v4 + - name: setup go + uses: actions/setup-go@v5 + with: + go-version: '1.23' + check-latest: true + - name: add ffmpeg7 repo + run: sudo add-apt-repository -y ppa:ubuntuhandbook1/ffmpeg7 + - name: add nvidia repo + run: | + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ + sudo dpkg -i cuda-keyring_1.1-1_all.deb && \ + sudo apt-get update \ + - name: install fyne + run: go install fyne.io/fyne/v2/cmd/fyne@latest + - name: install libtinfo5 + run: | + wget http://security.ubuntu.com/ubuntu/pool/universe/n/ncurses/libtinfo5_6.3-2ubuntu0.1_amd64.deb && \ + sudo apt install ./libtinfo5_6.3-2ubuntu0.1_amd64.deb + - name: apt install + run: | + sudo apt install -fy \ + libavcodec-dev \ + libavformat-dev \ + libavfilter-dev \ + libavdevice-dev \ + libswscale-dev \ + libsrt-openssl-dev \ + libssl-dev \ + libasound2-dev \ + libxxf86vm-dev \ + make \ + cmake \ + nvidia-cuda-toolkit \ + cuda-toolkit-12-2 \ + libxcursor-dev \ + libxrandr-dev \ + libxinerama-dev \ + libxi-dev \ + gcc-mingw-w64-x86-64-win32 \ + g++-mingw-w64-x86-64-win32 \ + - name: apt clean + run: sudo apt clean + - name: make ${{ matrix.target.name }} + run: make ${{ matrix.target.name }} INSTALL_DEST=build/${{ matrix.target.artifact }} + - name: upload-artifact + uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.target.artifact }} + path: build/${{ matrix.target.artifact }} diff --git a/.github/workflows/rolling_release.yaml b/.github/workflows/rolling_release.yaml new file mode 100644 index 0000000..e5b9c16 --- /dev/null +++ b/.github/workflows/rolling_release.yaml @@ -0,0 +1,60 @@ +name: rolling-release + +on: + push: + branches: + - main + - test/ci + +jobs: + build: + uses: ./.github/workflows/build.yaml + rolling-release: + name: rolling-release + needs: [build] + runs-on: ubuntu-latest + steps: + - name: download stt-linux-amd64-cuda + uses: actions/download-artifact@v4 + with: + name: stt-linux-amd64-cuda + - name: download stt-linux-amd64 + uses: actions/download-artifact@v4 + with: + name: stt-linux-amd64 + - name: download sttd-linux-amd64-cuda + uses: actions/download-artifact@v4 + with: + name: sttd-linux-amd64-cuda + - name: download sttd-linux-amd64 + uses: actions/download-artifact@v4 + with: + name: sttd-linux-amd64 + - name: download subtitleswindow-linux-amd64 + uses: actions/download-artifact@v4 + with: + name: subtitleswindow-linux-amd64 + - name: download subtitleswindow-linux-amd64-cuda + uses: actions/download-artifact@v4 + with: + name: subtitleswindow-linux-amd64-cuda + - name: download subtitleswindow-windows-amd64 + uses: actions/download-artifact@v4 + with: + name: subtitleswindow-windows-amd64 + - name: get the timestamp + id: date + run: echo "::set-output name=date::$(date +'%Y-%m-%d_%H%M%S')" + - uses: "marvinpinto/action-automatic-releases@latest" + with: + repo_token: "${{ secrets.GITHUB_TOKEN }}" + prerelease: true + automatic_release_tag: unstable-${{ steps.date.outputs.date }} + files: | + stt-linux-amd64-cuda + stt-linux-amd64 + sttd-linux-amd64-cuda + sttd-linux-amd64 + subtitleswindow-linux-amd64-cuda + subtitleswindow-linux-amd64 + subtitleswindow-windows-amd64 diff --git a/Makefile b/Makefile index 4faefff..f881d93 100644 --- a/Makefile +++ b/Makefile @@ -67,7 +67,7 @@ BUILD_DATE_STRING?=$(shell date +%s) LINKER_FLAGS?=-X=github.com/xaionaro-go/buildvars.GitCommit=$(GIT_COMMIT) -X=github.com/xaionaro-go/buildvars.Version=$(VERSION_STRING) -X=github.com/xaionaro-go/buildvars.BuildDateString=$(BUILD_DATE_STRING) -WINDOWS_EXTLINKER_FLAGS?=-L$(PWD)/thirdparty/windows/portaudio-binaries/ -L$(PWD)/thirdparty/windows/amd64/ffmpeg-n7.0-21-gfb8f0ea7b3-win64-gpl-shared-7.0/lib +WINDOWS_EXTLINKER_FLAGS?=-L$(PWD)/thirdparty/windows/portaudio-binaries/ -L$(PWD)/thirdparty/windows/amd64/ffmpeg-n7.0-21-gfb8f0ea7b3-win64-gpl-shared-7.0/lib -L$(PWD)/thirdparty/windows/amd64/cuda_12.2/libcublas/cublas_dev/lib/x64/ -L$(PWD)/thirdparty/windows/amd64/cuda_12.2/cuda_cudart/cudart/lib/x64/ -L$(PWD)/thirdparty/windows/amd64/whisper/ -lwhisper LINKER_FLAGS_ANDROID?=$(LINKER_FLAGS) LINKER_FLAGS_DARWIN?=$(LINKER_FLAGS) @@ -111,7 +111,9 @@ thirdparty/portaudio/include/portaudio.h: thirdparty/windows/amd64/ready: mkdir -p thirdparty/windows/amd64 - sh -c 'cd thirdparty/windows/amd64 && wget https://github.com/BtbN/FFmpeg-Builds/releases/download/autobuild-2024-04-30-12-51/ffmpeg-n7.0-21-gfb8f0ea7b3-win64-gpl-shared-7.0.zip && unzip -o ffmpeg-n7.0-21-gfb8f0ea7b3-win64-gpl-shared-7.0.zip && rm -f ffmpeg-n7.0-21-gfb8f0ea7b3-win64-gpl-shared-7.0.zip' + sh -c 'cd thirdparty/windows/amd64 && wget -nc -q --show-progress https://github.com/BtbN/FFmpeg-Builds/releases/download/autobuild-2024-04-30-12-51/ffmpeg-n7.0-21-gfb8f0ea7b3-win64-gpl-shared-7.0.zip && unzip -o ffmpeg-n7.0-21-gfb8f0ea7b3-win64-gpl-shared-7.0.zip && rm -f ffmpeg-n7.0-21-gfb8f0ea7b3-win64-gpl-shared-7.0.zip' + sh -c 'cd thirdparty/windows/amd64 && wget -nc -q --show-progress https://developer.download.nvidia.com/compute/cuda/12.2.0/local_installers/cuda_12.2.0_536.25_windows.exe && 7z x -y -ocuda_12.2 cuda_12.2.0_536.25_windows.exe && rm -f cuda_12.2.0_536.25_windows.exe' + sh -c 'cd thirdparty/windows/amd64 && wget -nc -q --show-progress https://github.com/xaionaro/whisper-prebuilds/releases/download/99b011a9f5e63f71/whisper-cublas-12.2.0-bin-x64.zip && mkdir whisper && cd whisper && unzip ../whisper-cublas-12.2.0-bin-x64.zip && rm -f ../whisper-cublas-12.2.0-bin-x64.zip' touch thirdparty/windows/amd64/ready pkg/speech/speechtotext/implementations/whisper/pkgconfig/libwhisper.pc: @@ -147,7 +149,7 @@ subtitleswindow-linux-amd64: build deps subtitleswindow-windows-amd64: build deps windows-deps $(eval INSTALL_DEST?=build/subtitleswindow-windows-amd64.exe) - PKG_CONFIG_PATH=$(PKG_CONFIG_PATH) CGO_ENABLED=1 CGO_LDFLAGS="-static" CGO_CFLAGS="$(WINDOWS_CGO_FLAGS)" CC=x86_64-w64-mingw32-gcc GOOS=windows go build $(GOBUILD_FLAGS) -ldflags "$(LINKER_FLAGS_WINDOWS)" -o "$(INSTALL_DEST)" ./cmd/subtitleswindow + PKG_CONFIG_PATH=$(PKG_CONFIG_PATH) CGO_ENABLED=1 CGO_LDFLAGS="" CGO_CFLAGS="$(WINDOWS_CGO_FLAGS)" CC=x86_64-w64-mingw32-gcc GOOS=windows go build $(GOBUILD_FLAGS) -ldflags "$(LINKER_FLAGS_WINDOWS)" -o "$(INSTALL_DEST)" ./cmd/subtitleswindow $(eval undefine INSTALL_DEST) example-stt: stt-$(shell go env GOOS)-$(shell go env GOARCH) diff --git a/pkg/subtitleswindow/speech_recognizer.go b/pkg/subtitleswindow/speech_recognizer.go index 92e1436..7e1ddde 100644 --- a/pkg/subtitleswindow/speech_recognizer.go +++ b/pkg/subtitleswindow/speech_recognizer.go @@ -11,9 +11,11 @@ import ( "fyne.io/fyne/v2/widget" "github.com/facebookincubator/go-belt/tool/logger" "github.com/hashicorp/go-multierror" + syswhisper "github.com/mutablelogic/go-whisper/sys/whisper" "github.com/xaionaro-go/observability" "github.com/xaionaro-go/speech/pkg/speech" "github.com/xaionaro-go/speech/pkg/speech/speechtotext/client" + "github.com/xaionaro-go/speech/pkg/speech/speechtotext/implementations/whisper" "github.com/xaionaro-go/speech/pkg/speech/speechtotext/implementations/whisper/types" "github.com/xaionaro-go/speech/pkg/speech/speechtotext/server/goconv" "github.com/xaionaro-go/speech/pkg/speech/speechtotext/server/proto/go/speechtotext_grpc" @@ -295,3 +297,27 @@ func (r *speechRecognizer) Close() error { }) return mErr.ErrorOrNil() } + +func initLocalSTT( + ctx context.Context, + gpu int, + whisperModel []byte, + language speech.Language, + shouldTranslate bool, + vadThreshold float64, +) (speech.ToText, error) { + var opts whisper.Options + if gpu >= 0 { + opts = append(opts, whisper.OptionGPUDeviceID(gpu)) + } + return whisper.New( + ctx, + whisperModel, + language, + types.SamplingStrategyBreamSearch, + shouldTranslate, + syswhisper.AlignmentAheadsPresetNone, + vadThreshold, + opts..., + ) +} diff --git a/pkg/subtitleswindow/speech_recognizer_notwindows.go b/pkg/subtitleswindow/speech_recognizer_notwindows.go deleted file mode 100644 index b3892c1..0000000 --- a/pkg/subtitleswindow/speech_recognizer_notwindows.go +++ /dev/null @@ -1,37 +0,0 @@ -//go:build !windows -// +build !windows - -package subtitleswindow - -import ( - "context" - - syswhisper "github.com/mutablelogic/go-whisper/sys/whisper" - "github.com/xaionaro-go/speech/pkg/speech" - "github.com/xaionaro-go/speech/pkg/speech/speechtotext/implementations/whisper" - "github.com/xaionaro-go/speech/pkg/speech/speechtotext/implementations/whisper/types" -) - -func initLocalSTT( - ctx context.Context, - gpu int, - whisperModel []byte, - language speech.Language, - shouldTranslate bool, - vadThreshold float64, -) (speech.ToText, error) { - var opts whisper.Options - if gpu >= 0 { - opts = append(opts, whisper.OptionGPUDeviceID(gpu)) - } - return whisper.New( - ctx, - whisperModel, - language, - types.SamplingStrategyBreamSearch, - shouldTranslate, - syswhisper.AlignmentAheadsPresetNone, - vadThreshold, - opts..., - ) -} diff --git a/pkg/subtitleswindow/speech_recognizer_windows.go b/pkg/subtitleswindow/speech_recognizer_windows.go deleted file mode 100644 index 401a6d4..0000000 --- a/pkg/subtitleswindow/speech_recognizer_windows.go +++ /dev/null @@ -1,22 +0,0 @@ -//go:build windows -// +build windows - -package subtitleswindow - -import ( - "context" - "fmt" - - "github.com/xaionaro-go/speech/pkg/speech" -) - -func initLocalSTT( - ctx context.Context, - gpu int, - whisperModel []byte, - language speech.Language, - shouldTranslate bool, - vadThreshold float64, -) (speech.ToText, error) { - return nil, fmt.Errorf("local speech-to-text is not implemented for windows, yet") -}