Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Introduce new GGUFValueType.OBJ virtual type🌠 #5143

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -854,3 +854,6 @@ tests/test-model-load-cancel: tests/test-model-load-cancel.cpp ggml.o llama.o te
tests/test-autorelease: tests/test-autorelease.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

tests/test-gguf-meta: tests/test-gguf-meta.cpp ggml.o llama.o tests/get-model.cpp $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
61 changes: 4 additions & 57 deletions examples/llava/clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "ggml.h"
#include "ggml-alloc.h"
#include "ggml-backend.h"
#include "llama.h"

#ifdef GGML_USE_CUBLAS
#include "ggml-cuda.h"
Expand Down Expand Up @@ -148,24 +149,6 @@ static std::string get_ftype(int ftype) {
return ggml_type_name(static_cast<ggml_type>(ftype));
}

static std::string gguf_data_to_str(enum gguf_type type, const void * data, int i) {
switch (type) {
case GGUF_TYPE_UINT8: return std::to_string(((const uint8_t *)data)[i]);
case GGUF_TYPE_INT8: return std::to_string(((const int8_t *)data)[i]);
case GGUF_TYPE_UINT16: return std::to_string(((const uint16_t *)data)[i]);
case GGUF_TYPE_INT16: return std::to_string(((const int16_t *)data)[i]);
case GGUF_TYPE_UINT32: return std::to_string(((const uint32_t *)data)[i]);
case GGUF_TYPE_INT32: return std::to_string(((const int32_t *)data)[i]);
case GGUF_TYPE_UINT64: return std::to_string(((const uint64_t *)data)[i]);
case GGUF_TYPE_INT64: return std::to_string(((const int64_t *)data)[i]);
case GGUF_TYPE_FLOAT32: return std::to_string(((const float *)data)[i]);
case GGUF_TYPE_FLOAT64: return std::to_string(((const double *)data)[i]);
case GGUF_TYPE_BOOL: return ((const bool *)data)[i] ? "true" : "false";
default: return format("unknown type %d", type);
}
}


static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
std::string result;
for (size_t pos = 0; ; pos += search.length()) {
Expand All @@ -180,43 +163,6 @@ static void replace_all(std::string & s, const std::string & search, const std::
s = std::move(result);
}

static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);

switch (type) {
case GGUF_TYPE_STRING:
return gguf_get_val_str(ctx_gguf, i);
case GGUF_TYPE_ARRAY:
{
const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
int arr_n = gguf_get_arr_n(ctx_gguf, i);
const void * data = gguf_get_arr_data(ctx_gguf, i);
std::stringstream ss;
ss << "[";
for (int j = 0; j < arr_n; j++) {
if (arr_type == GGUF_TYPE_STRING) {
std::string val = gguf_get_arr_str(ctx_gguf, i, j);
// escape quotes
replace_all(val, "\\", "\\\\");
replace_all(val, "\"", "\\\"");
ss << '"' << val << '"';
} else if (arr_type == GGUF_TYPE_ARRAY) {
ss << "???";
} else {
ss << gguf_data_to_str(arr_type, data, j);
}
if (j < arr_n - 1) {
ss << ", ";
}
}
ss << "]";
return ss.str();
}
default:
return gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0);
}
}

static void print_tensor_info(const ggml_tensor* tensor, const char* prefix = "") {
size_t tensor_size = ggml_nbytes(tensor);
printf("%s: n_dims = %d, name = %s, tensor_size=%zu, shape:[%" PRId64 ", %" PRId64 ", %" PRId64 ", %" PRId64 "], type = %s\n",
Expand Down Expand Up @@ -784,11 +730,12 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
const char * name = gguf_get_key(ctx, i);
const enum gguf_type type = gguf_get_kv_type(ctx, i);
const std::string type_name =
type == GGUF_TYPE_ARRAY
type == GGUF_TYPE_ARRAY || type == GGUF_TYPE_OBJ
? format("%s[%s,%d]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(ctx, i)), gguf_get_arr_n(ctx, i))
: gguf_type_name(type);

std::string value = gguf_kv_to_str(ctx, i);
char * v = gguf_kv_to_c_str(ctx, i, name);
std::string value = v;
const size_t MAX_VALUE_LEN = 40;
if (value.size() > MAX_VALUE_LEN) {
value = format("%s...", value.substr(0, MAX_VALUE_LEN - 3).c_str());
Expand Down
70 changes: 60 additions & 10 deletions ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -19284,8 +19284,9 @@ static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
[GGUF_TYPE_INT64] = sizeof(int64_t),
[GGUF_TYPE_FLOAT64] = sizeof(double),
[GGUF_TYPE_ARRAY] = 0, // undefined
[GGUF_TYPE_OBJ] = 0, // undefined
};
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
static_assert(GGUF_TYPE_COUNT == 14, "GGUF_TYPE_COUNT != 14");

static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
[GGUF_TYPE_UINT8] = "u8",
Expand All @@ -19301,8 +19302,9 @@ static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
[GGUF_TYPE_UINT64] = "u64",
[GGUF_TYPE_INT64] = "i64",
[GGUF_TYPE_FLOAT64] = "f64",
[GGUF_TYPE_OBJ] = "obj",
};
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
static_assert(GGUF_TYPE_COUNT == 14, "GGUF_TYPE_COUNT != 14");

union gguf_value {
uint8_t uint8;
Expand Down Expand Up @@ -19525,6 +19527,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break;
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break;
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break;
case GGUF_TYPE_OBJ:
case GGUF_TYPE_ARRAY:
{
ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
Expand Down Expand Up @@ -19571,7 +19574,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
}
} break;
case GGUF_TYPE_ARRAY:
case GGUF_TYPE_OBJ:
case GGUF_TYPE_ARRAY: break;
default: GGML_ASSERT(false && "invalid type"); break;
}
} break;
Expand Down Expand Up @@ -19778,7 +19782,7 @@ void gguf_free(struct gguf_context * ctx) {
}
}

if (kv->type == GGUF_TYPE_ARRAY) {
if (kv->type == GGUF_TYPE_ARRAY || kv->type == GGUF_TYPE_OBJ) {
if (kv->value.arr.data) {
if (kv->value.arr.type == GGUF_TYPE_STRING) {
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
Expand Down Expand Up @@ -19863,7 +19867,7 @@ enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int key_id) {

enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id) {
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY || ctx->kv[key_id].type == GGUF_TYPE_OBJ);
return ctx->kv[key_id].value.arr.type;
}

Expand All @@ -19875,15 +19879,15 @@ const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id) {

const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) {
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY || ctx->kv[key_id].type == GGUF_TYPE_OBJ);
struct gguf_kv * kv = &ctx->kv[key_id];
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i];
return str->data;
}

int gguf_get_arr_n(const struct gguf_context * ctx, int key_id) {
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY || ctx->kv[key_id].type == GGUF_TYPE_OBJ);
return ctx->kv[key_id].value.arr.n;
}

Expand Down Expand Up @@ -19962,6 +19966,7 @@ const char * gguf_get_val_str(const struct gguf_context * ctx, int key_id) {
const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id) {
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_ARRAY);
GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_OBJ);
GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_STRING);
return &ctx->kv[key_id].value;
}
Expand Down Expand Up @@ -20106,6 +20111,10 @@ void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_ty
ctx->kv[idx].type = GGUF_TYPE_ARRAY;
ctx->kv[idx].value.arr.type = type;
ctx->kv[idx].value.arr.n = n;
if (data == NULL) {
ctx->kv[idx].value.arr.data = NULL;
return;
}
ctx->kv[idx].value.arr.data = GGML_MALLOC(n*gguf_type_size(type));
memcpy(ctx->kv[idx].value.arr.data, data, n*gguf_type_size(type));
}
Expand All @@ -20124,6 +20133,38 @@ void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char **
}
}

void gguf_set_arr_obj(struct gguf_context * ctx, const char * key, int n) {
const int idx = gguf_get_or_add_key(ctx, key);

ctx->kv[idx].type = GGUF_TYPE_ARRAY;
ctx->kv[idx].value.arr.type = GGUF_TYPE_OBJ;
ctx->kv[idx].value.arr.n = n;
ctx->kv[idx].value.arr.data = NULL;
}

void gguf_set_arr_arr(struct gguf_context * ctx, const char * key, int n) {
const int idx = gguf_get_or_add_key(ctx, key);

ctx->kv[idx].type = GGUF_TYPE_ARRAY;
ctx->kv[idx].value.arr.type = GGUF_TYPE_ARRAY;
ctx->kv[idx].value.arr.n = n;
ctx->kv[idx].value.arr.data = NULL;
}

void gguf_set_obj_str(struct gguf_context * ctx, const char * key, const char ** data, int n) {
const int idx = gguf_get_or_add_key(ctx, key);

ctx->kv[idx].type = GGUF_TYPE_OBJ;
ctx->kv[idx].value.arr.type = GGUF_TYPE_STRING;
ctx->kv[idx].value.arr.n = n;
ctx->kv[idx].value.arr.data = GGML_MALLOC(n*sizeof(struct gguf_str));
for (int i = 0; i < n; i++) {
struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i];
str->n = strlen(data[i]);
str->data = strdup(data[i]);
}
}

// set or add KV pairs from another context
void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
for (uint32_t i = 0; i < src->header.n_kv; i++) {
Expand All @@ -20140,6 +20181,15 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64); break;
case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break;
case GGUF_TYPE_STRING: gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break;
case GGUF_TYPE_OBJ:
{
const char ** data = malloc(src->kv[i].value.arr.n*sizeof(char *));
for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) {
data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data;
}
gguf_set_obj_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
free((void *)data);
} break;
case GGUF_TYPE_ARRAY:
{
if (src->kv[i].value.arr.type == GGUF_TYPE_STRING) {
Expand All @@ -20149,8 +20199,6 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
}
gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
GGML_FREE((void *)data);
} else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) {
GGML_ASSERT(false && "nested arrays not supported");
} else {
gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n);
}
Expand Down Expand Up @@ -20304,6 +20352,7 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
case GGUF_TYPE_FLOAT64: gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break;
case GGUF_TYPE_BOOL: gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break;
case GGUF_TYPE_STRING: gguf_bwrite_str(buf, &kv->value.str ); break;
case GGUF_TYPE_OBJ:
case GGUF_TYPE_ARRAY:
{
gguf_bwrite_el(buf, &kv->value.arr.type, sizeof(kv->value.arr.type));
Expand All @@ -20330,7 +20379,8 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
gguf_bwrite_str(buf, &((struct gguf_str *) kv->value.arr.data)[j]);
}
} break;
case GGUF_TYPE_ARRAY:
case GGUF_TYPE_OBJ:
case GGUF_TYPE_ARRAY: break;
default: GGML_ASSERT(false && "invalid type"); break;
}
} break;
Expand Down
4 changes: 4 additions & 0 deletions ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -2141,6 +2141,7 @@ extern "C" {
GGUF_TYPE_UINT64 = 10,
GGUF_TYPE_INT64 = 11,
GGUF_TYPE_FLOAT64 = 12,
GGUF_TYPE_OBJ = 13,
GGUF_TYPE_COUNT, // marks the end of the enum
};

Expand Down Expand Up @@ -2212,6 +2213,9 @@ extern "C" {
GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
GGML_API void gguf_set_arr_obj (struct gguf_context * ctx, const char * key, int n);
GGML_API void gguf_set_arr_arr (struct gguf_context * ctx, const char * key, int n);
GGML_API void gguf_set_obj_str (struct gguf_context * ctx, const char * key, const char ** data, int n);

// set or add KV pairs from another context
GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
Expand Down
59 changes: 59 additions & 0 deletions gguf-py/gguf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import sys
from enum import Enum, IntEnum, auto
from typing import Any
import numpy as np

#
# constants
Expand Down Expand Up @@ -550,6 +551,64 @@ class GGUFValueType(IntEnum):
UINT64 = 10
INT64 = 11
FLOAT64 = 12
OBJ = 13

@staticmethod
def get_type_ex(val: Any) -> GGUFValueType:
if isinstance(val, (str, bytes, bytearray)):
return GGUFValueType.STRING
elif isinstance(val, list):
return GGUFValueType.ARRAY
elif isinstance(val, np.float32):
return GGUFValueType.FLOAT32
elif isinstance(val, np.float64):
return GGUFValueType.FLOAT64
elif isinstance(val, float):
return GGUFValueType.FLOAT32
elif isinstance(val, bool):
return GGUFValueType.BOOL
elif isinstance(val, np.uint8):
return GGUFValueType.UINT8
elif isinstance(val, np.uint16):
return GGUFValueType.UINT16
elif isinstance(val, np.uint32):
return GGUFValueType.UINT32
elif isinstance(val, np.uint64):
return GGUFValueType.UINT64
elif isinstance(val, np.int8):
return GGUFValueType.INT8
elif isinstance(val, np.int16):
return GGUFValueType.INT16
elif isinstance(val, np.int32):
return GGUFValueType.INT32
elif isinstance(val, np.int64):
return GGUFValueType.INT64
elif isinstance(val, int):
if val >=0 and val <= np.iinfo(np.uint8).max:
return GGUFValueType.UINT8
elif val >=0 and val <= np.iinfo(np.uint16).max:
return GGUFValueType.UINT16
elif val >=0 and val <= np.iinfo(np.uint32).max:
return GGUFValueType.UINT32
elif val >=0 and val <= np.iinfo(np.uint64).max:
return GGUFValueType.UINT64
elif val >=np.iinfo(np.int8).min and val <= np.iinfo(np.int8).max:
return GGUFValueType.INT8
elif val >=np.iinfo(np.int16).min and val <= np.iinfo(np.int16).max:
return GGUFValueType.INT16
elif val >=np.iinfo(np.int32).min and val <= np.iinfo(np.int32).max:
return GGUFValueType.INT32
elif val >=np.iinfo(np.int64).min and val <= np.iinfo(np.int64).max:
return GGUFValueType.INT64
else:
print("The integer exceed limit:", val)
sys.exit()
elif isinstance(val, dict):
return GGUFValueType.OBJ
# TODO: need help with 64-bit types in Python
else:
print("Unknown type:", type(val))
sys.exit()

@staticmethod
def get_type(val: Any) -> GGUFValueType:
Expand Down
Loading
Loading