Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add whisper.cpp (server) support to llamafile #517

Merged
merged 1 commit into from
Jul 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ include build/rules.mk
include llamafile/BUILD.mk
include llama.cpp/BUILD.mk
include stable-diffusion.cpp/BUILD.mk
include whisper.cpp/BUILD.mk
include double-conversion/BUILD.mk
include stb/BUILD.mk

Expand All @@ -22,6 +23,8 @@ include stb/BUILD.mk
.PHONY: o/$(MODE)/
o/$(MODE)/: o/$(MODE)/llamafile \
o/$(MODE)/llama.cpp \
o/$(MODE)/stable-diffusion.cpp \
o/$(MODE)/whisper.cpp \
o/$(MODE)/stb \
o/$(MODE)/depend.test

Expand Down
9 changes: 9 additions & 0 deletions llama.cpp/ggml-backend.c
Original file line number Diff line number Diff line change
Expand Up @@ -1867,6 +1867,15 @@ int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched) {
return sched->n_copies;
}

int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched) {
return sched->n_backends;
}

ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i) {
GGML_ASSERT(i >= 0 && i < sched->n_backends);
return sched->backends[i];
}

size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend) {
int backend_index = ggml_backend_sched_backend_id(sched, backend);
GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends);
Expand Down
3 changes: 3 additions & 0 deletions llama.cpp/ggml-backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,9 @@ extern "C" {
// Initialize backend buffers from a measure graph
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);

GGML_API int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched);
GGML_API ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i);

// Get the number of splits of the last graph
GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
GGML_API int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched);
Expand Down
12 changes: 11 additions & 1 deletion llamafile/llamafile.c
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ struct llamafile *llamafile_open_gguf(const char *fname, const char *mode) {
errno = EIO;
return 0;
}
if (ZIP_READ32(buf) == ZIP_READ32("GGUF")) {
if (ZIP_READ32(buf) == ZIP_READ32("GGUF") || ZIP_READ32(buf) == ZIP_READ32("ggml")) {
errno = EINVAL;
return file;
}
Expand All @@ -312,6 +312,16 @@ size_t llamafile_size(struct llamafile *file) {
return file->size;
}

size_t llamafile_position(struct llamafile *file) {
return file->position;
}

bool llamafile_eof(struct llamafile *file) {
if (file->fp)
return feof(file->fp);
return file->position >= file->size;
}

void *llamafile_content(struct llamafile *file) {
return file->content;
}
Expand Down
2 changes: 2 additions & 0 deletions llamafile/llamafile.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ bool llamafile_seek(struct llamafile *, size_t, int);
void *llamafile_content(struct llamafile *);
size_t llamafile_tell(struct llamafile *);
size_t llamafile_size(struct llamafile *);
size_t llamafile_position(struct llamafile *);
bool llamafile_eof(struct llamafile *file);
FILE *llamafile_fp(struct llamafile *);
void llamafile_ref(struct llamafile *);
void llamafile_unref(struct llamafile *);
Expand Down
40 changes: 40 additions & 0 deletions whisper.cpp/BUILD.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
#── vi: set noet ft=make ts=8 sw=8 fenc=utf-8 :vi ────────────────────┘

PKGS += WHISPER_CPP

WHISPER_CPP_FILES := $(wildcard whisper.cpp/*.*)
WHISPER_CPP_HDRS = $(filter %.h,$(WHISPER_CPP_FILES)) \
$(filter %.hpp,$(WHISPER_CPP_FILES))
WHISPER_CPP_INCS = $(filter %.inc,$(WHISPER_CPP_FILES))
WHISPER_CPP_SRCS_C = $(filter %.c,$(WHISPER_CPP_FILES))
WHISPER_CPP_SRCS_CPP = $(filter %.cpp,$(WHISPER_CPP_FILES))
WHISPER_CPP_SRCS = $(WHISPER_CPP_SRCS_C) $(WHISPER_CPP_SRCS_CPP)

WHISPER_CPP_OBJS = \
$(LLAMAFILE_OBJS) \
$(WHISPER_CPP_SRCS_C:%.c=o/$(MODE)/%.o) \
$(WHISPER_CPP_SRCS_CPP:%.cpp=o/$(MODE)/%.o)

o/$(MODE)/whisper.cpp/whisper.cpp.a: $(WHISPER_CPP_OBJS)

$(WHISPER_CPP_OBJS): private \
CCFLAGS += \
-DGGML_MULTIPLATFORM

$(WHISPER_CPP_OBJS): private \
CXXFLAGS += \
-frtti \
-Wno-alloc-size-larger-than \
-Wno-deprecated-declarations

o/$(MODE)/whisper.cpp/server: \
o/$(MODE)/whisper.cpp/server.o \
o/$(MODE)/whisper.cpp/whisper.cpp.a \
o/$(MODE)/llama.cpp/llama.cpp.a

$(WHISPER_CPP_OBJS): whisper.cpp/BUILD.mk

.PHONY: o/$(MODE)/whisper.cpp
o/$(MODE)/whisper.cpp: \
o/$(MODE)/whisper.cpp/server
Loading