Fixed to compile with the latest ggml from llama.cpp.

Fixed a bug in the tokenizer (it aborted with "illustration"). Fixed non-square images (width != height).
aagdev · Sep 7, 2024 · 3248e31 · 3248e31
1 parent 07d0180
commit 3248e31
Show file tree

Hide file tree

Showing 22 changed files with 361 additions and 207 deletions.
diff --git a/Makefile b/Makefile
@@ -24,8 +24,8 @@ cflags += -Wno-pedantic
 ldlibs += -lggml
 ldflags += -L$(GGML_LIB_PATH) -Wl,-rpath,$(GGML_LIB_PATH)
 
-## ggml scheduler is need for incomplete backends (e.g. Vulkan)
-ifndef MLIS_NO_GGML_SCHED
+## ggml scheduler is need for incomplete backends (no longer needed for vulkan)
+ifdef MLIS_GGML_SCHED
 mlimgsynth: cppflags += -DUSE_GGML_SCHED=1
 endif
 
@@ -51,8 +51,8 @@ endif
 ### Binary targets
 rng-test: $(common) rng_philox.o rng-test.o
 
-st-util: $(common) ids.o tensorstore.o safetensors.o st-util.o
+st-util: $(common) tensorstore.o safetensors.o st-util.o
 
-mlimgsynth: $(common) ids.o localtensor.o tensorstore.o safetensors.o \
+mlimgsynth: $(common) localtensor.o tensorstore.o safetensors.o \
 	ggml_extend.o mlblock.o mlblock_nn.o rng_philox.o tae.o vae.o clip.o unet.o \
 	solvers.o util.o mlimgsynth.o
diff --git a/src/ccommon/alloc.h b/src/ccommon/alloc.h
@@ -86,7 +86,7 @@ void * alloc_alloc(Allocator* a, size_t sz) {
 // May be larger than the requested size. The additional space can be used normally.
 static inline
 size_t alloc_size(const Allocator* a, const void* p) {
-	assert(a->flags & ALLOC_F_HAS_SIZE4);
+	assert(!a || a->flags & ALLOC_F_HAS_SIZE4);
 	return p ? ((uint32_t*)p)[-1] & ALLOC_SIZE_MASK : 0;
 }
 

diff --git a/src/ccommon/base.mk b/src/ccommon/base.mk
@@ -13,12 +13,12 @@
 objdir = obj
 depdir = .d
 
-flags = $(FLAGS)
-cflags = -std=c99 -Wall -pedantic $(CFLAGS)
+flags    = $(FLAGS)
 cppflags = $(CPPFLAGS)
+cflags   = -std=c99 -Wall -pedantic $(CFLAGS)
 cxxflags = $(CXXFLAGS)
-ldlibs = $(LDLIBS)
-ldflags = $(LDFLAGS)
+ldlibs   = $(LDLIBS)
+ldflags  = $(LDFLAGS)
 
 depflags = -MT $@ -MMD -MP -MF $(depdir)/$*.d
 
@@ -60,8 +60,8 @@ endif
 .SECONDARY:
 
 ### Commands
-COMPILE_C = $(CC) $(depflags) $(flags) $(cppflags) $(cflags) -c -o $@ $<
-COMPILE_CPP = $(CXX) $(depflags) $(flags) $(cppflags) $(cxxflags) -c -o $@ $<
+COMPILE_C   = $(CC)  $(depflags) $(flags) $(cppflags) $(cflags)   -c -o $@ $<
+COMPILE_CXX = $(CXX) $(depflags) $(flags) $(cppflags) $(cxxflags) -c -o $@ $<
 #LINK = $(CC) $(flags) $(ldflags) -o $@ $^ $(ldlibs)
 LINK = $(CC) $(flags) $(ldflags) -o $@ \
 	$(addprefix $(objdir)/,$(filter %.o,$^)) $(ldlibs)
@@ -124,10 +124,10 @@ endif
 
 $(objdir)/%.o: %.cpp
 ifdef verbose
-	$(COMPILE_CPP)
+	$(COMPILE_CXX)
 else
-	@echo "CC $@"
-	@$(COMPILE_CPP)
+	@echo "CXX $@"
+	@$(COMPILE_CXX)
 endif
 
 ###

diff --git a/src/ccommon/logging.h b/src/ccommon/logging.h
@@ -85,6 +85,7 @@ __attribute__((format(printf, 2, 3)))
 void log_logf(int level, const char format[], ...);
 
 // Low level interface, no checking
+//TODO: join line_begin and level_check
 void log_line_begin(int level);
 
 void log_line_str(const char* str);

diff --git a/src/ccommon/stream.c b/src/ccommon/stream.c
@@ -283,11 +283,19 @@ int stream_read_prep(Stream* S, size_t nbytes)
 	IFFALSESET(ncheck, STREAM_BUFFER_SIZE/2);
 
 	// Read more data
-	if (!(S->cursor+ncheck <= S->cursor_end) && S->cls->read)
-	{
-		TRYR( stream_ibuffer_increase(S, nbytes) );
-		stream_read_buffer_reposition(S);
-		TRYR( stream_read_buffer_fill(S) );
+	if (S->cls->read) {
+		if (!(S->cursor+ncheck <= S->cursor_end))
+		{
+			TRYR( stream_ibuffer_increase(S, nbytes) );
+			stream_read_buffer_reposition(S);
+			TRYR( stream_read_buffer_fill(S) );
+		}
+	}
+	else {
+		if (S->cursor == S->cursor_end)
+			S->flags |= SF_END;
+		else
+			S->flags &= ~SF_END;
 	}
 
 	if (nbytes && !(S->cursor+nbytes <= S->cursor_end))

diff --git a/src/ccommon/stream.h b/src/ccommon/stream.h
@@ -235,7 +235,7 @@ long stream_write(Stream*restrict S, size_t nbytes, const void*restrict buffer);
  * Returns 0 on success.
  */
 static inline
-int stream_write_chk(Stream*restrict S, size_t nbytes, void*restrict buffer);
+int stream_write_chk(Stream*restrict S, size_t nbytes, const void*restrict buffer);
 
 /* Changes the position of the stream cursor if posible.
  *
@@ -332,6 +332,15 @@ enum {
 	STREAM_STD_ERR = 2,
 };
 
+/* Return true if the stream is fully memory mapped.
+ * In that case, the buffers returned are not invalidated by any operation
+ * except closing the stream.
+ */
+static inline
+bool stream_mmap_is(Stream* S) {
+	return !S->cls->read && !S->cls->write && !S->cls->seek;
+}
+
 /* Loads a file completely in a memory stream.
  * Returns 0 on success, a negative error code on failure.
  */
@@ -486,7 +495,7 @@ long stream_write(Stream*restrict S, size_t nbytes, const void*restrict buffer)
 }
 
 static inline
-int stream_write_chk(Stream*restrict S, size_t nbytes, void*restrict buffer)
+int stream_write_chk(Stream*restrict S, size_t nbytes, const void*restrict buffer)
 {
 	long r = stream_write(S, nbytes, buffer);
 	if (r < 0) return r;

diff --git a/src/ccommon/vector.h b/src/ccommon/vector.h
@@ -32,6 +32,7 @@
 #include <string.h>
 
 //TODO: use alloc_size for capacity ?
+//TODO: vec_push(P, ...) to allow commas in the value ?
 
 #ifndef VECTOR_DEF_ALLOC
 #define VECTOR_DEF_ALLOC  g_allocator_dopt

diff --git a/src/clip.c b/src/clip.c
@@ -140,6 +140,7 @@ void str_lower(DynStr str) {
 int clip_tokr_tokenize(ClipTokenizer* S, const char* cur, int32_t** pout)
 {
 	int R=1;
+	DynStr word=NULL, bigram=NULL;
 	const char *end = cur + strlen(cur);
 
 	while (1) {
@@ -172,8 +173,6 @@ int clip_tokr_tokenize(ClipTokenizer* S, const char* cur, int32_t** pout)
 		}
 
 		size_t len = cur-beg;
-		DynStr word=dstr_stack(32);
-		if (len >= vec_capacity(word)) ERROR_LOG(-1, "word too long");
 		dstr_copy(word, len, beg);
 		str_lower(word);
 		dstr_appendz(word, "</w>");
@@ -196,7 +195,6 @@ int clip_tokr_tokenize(ClipTokenizer* S, const char* cur, int32_t** pout)
 
 		// BPE (byte pair encoding)
 		unsigned nvocab = strsto_count(&S->vocab);
-		DynStr bigram=dstr_stack(16);
 		while (vec_count(tokens) >= 2) {
 			StringInt best_iv=nvocab;
 			unsigned best_ib=vec_count(breaks);
@@ -224,6 +222,8 @@ int clip_tokr_tokenize(ClipTokenizer* S, const char* cur, int32_t** pout)
 
 end:
 	if (R<0) log_error("CLIP tokenizer");
+	dstr_free(bigram);
+	dstr_free(word);
 	return R;
 }
 
@@ -234,11 +234,12 @@ MLTensor* mlb_clip_embeddings(MLCtx* C, MLTensor* x, MLTensor* tw,
 	mlctx_block_begin(C);
 	// x: [N, n_token]
 
-	if (tw)
+	if (tw) {
 		GGML_ASSERT(tw->ne[0] == d_embed);
-	else
+	} else {
 		tw = MLN("token_embedding.weight",
 			ggml_new_tensor_2d(C->cp, C->c.wtype, d_embed, n_vocab));
+	}
 
 	pw = MLN("position_embedding.weight",
 		ggml_new_tensor_2d(C->cp, GGML_TYPE_F32, d_embed, n_token));
@@ -370,15 +371,15 @@ int clip_text_encode(MLCtx* C, const ClipParams* P,
 	// Prepare computation
 	mlctx_begin(C, "CLIP text encode");
 
-	MLTensor *input = mlctx_input_add(C, "tokens", GGML_TYPE_I32, P->n_token,1,1,1);
+	MLTensor *input = mlctx_input_new(C, "tokens", GGML_TYPE_I32, P->n_token,1,1,1);
 	MLTensor *t_embed = mlb_clip_text(C, input, NULL, P, clip_skip, norm);
 
 	MLTensor *result=t_embed, *t_feat=NULL;
 	if (feat)
 		result = t_feat = mlb_clip_text_proj(C, t_embed, ntok+1);
 
 	mlctx_tensor_add(C, "text", result);
-	TRY( mlctx_prep(C, result) );
+	TRY( mlctx_prep(C) );
 
 	// Set input
 	ggml_backend_tensor_set(input, tokens, 0, vec_bytesize(tokens));

diff --git a/src/ids.c b/src/ids.c
diff --git a/src/ids.h b/src/ids.h
@@ -49,3 +49,22 @@ static inline int id_fromsl(const StrSlice sl) {
 static inline int id_fromz(const char* str) {
 	return str && str[0] ? strsto_add(&g_ss, strsl_fromz(str)) : 0;
 }
+
+/* Implementation */
+#ifdef IDS_IMPLEMENTATION
+#undef IDS_IMPLEMENTATION
+StringStore g_ss;
+
+const char * ids_str[ID__END] = {
+#define S(X)  #X,
+#define S2(X,S) S,
+	LIST_OF_IDS
+#undef S2
+#undef S
+};
+
+void ids_init() {
+	for (unsigned i=0; i<ID__END; ++i)	
+		strsto_add2(&g_ss, strsl_fromz(ids_str[i]), i, true);
+}
+#endif
diff --git a/src/localtensor.c b/src/localtensor.c
@@ -83,11 +83,11 @@ void ltensor_from_image(LocalTensor* S, const Image* img)
 {
 	int n0=img->w, n1=img->h, n2=img->bypp;
 	ltensor_resize(S, n0, n1, n2, 1);
-	for (int y=0; y<n0; ++y) {
-		for (int x=0; x<n1; ++x) {
+	for (int y=0; y<n1; ++y) {
+		for (int x=0; x<n0; ++x) {
 			for (int c=0; c<n2; ++c) {
 				float v = IMG_INDEX3(*img, x, y, c) / 255.0f;
-				S->d[n0*n1*c +n1*y +x] = v;
+				S->d[n0*n1*c +n0*y +x] = v;
 			}
 		}
 	}
@@ -98,10 +98,10 @@ void ltensor_to_image(const LocalTensor* S, Image* img)
 	int n0=S->s[0], n1=S->s[1], n2=S->s[2];
 	assert(S->s[2] == 3 && S->s[3] == 1);
 	img_resize(img, n0, n1, IMG_FORMAT_RGB, 0);
-	for (int y=0; y<n0; ++y) {
-		for (int x=0; x<n1; ++x) {
+	for (int y=0; y<n1; ++y) {
+		for (int x=0; x<n0; ++x) {
 			for (int c=0; c<n2; ++c) {
-				float v = S->d[n0*n1*c +n1*y +x];
+				float v = S->d[n0*n1*c +n0*y +x];
 				ccCLAMP(v, 0, 1);
 				IMG_INDEX3(*img, x, y, c) = v * 255.0f;
 			}