Skip to content

Commit

Permalink
Merge 'origin/master' into hipblas
Browse files Browse the repository at this point in the history
  • Loading branch information
SlyEcho committed Jul 2, 2023
2 parents 04419f1 + 46088f7 commit 15db19a
Show file tree
Hide file tree
Showing 21 changed files with 580 additions and 251 deletions.
5 changes: 0 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -419,11 +419,6 @@ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES
if (MSVC)
# TODO: arm msvc?
else()
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
# Apple M1, M2, etc.
# Raspberry Pi 3, 4, Zero 2 (64-bit)
add_compile_options(-mcpu=native)
endif()
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
# Raspberry Pi 1, Zero
add_compile_options(-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access)
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ as the main playground for developing new features for the [ggml](https://github
- [X] [OpenBuddy 🐶 (Multilingual)](https://github.com/OpenBuddy/OpenBuddy)
- [X] [Pygmalion 7B / Metharme 7B](#using-pygmalion-7b--metharme-7b)
- [X] [WizardLM](https://github.com/nlpxucan/WizardLM)
- [X] [Baichuan-7B](https://huggingface.co/baichuan-inc/baichuan-7B)

**Bindings:**

Expand Down
41 changes: 36 additions & 5 deletions convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def find_n_mult(n_ff: int, n_embd: int) -> int:
calc_ff = (((8*n_embd) // 3 + n_mult - 1) // n_mult)*n_mult
if calc_ff == n_ff:
return n_mult
return 1
raise Exception(f"failed to find n_mult for (n_ff={n_ff}, n_embd={n_embd}).")

@dataclass
class Params:
Expand Down Expand Up @@ -321,6 +321,10 @@ def astype(self, data_type: DataType) -> 'Tensor': ...
@abstractmethod
def permute(self, n_head: int) -> 'Tensor': ...
@abstractmethod
def permute_part(self, n_part: int, n_head: int) -> 'UnquantizedTensor': ...
@abstractmethod
def part(self, n_part: int) -> 'UnquantizedTensor': ...
@abstractmethod
def to_ggml(self) -> 'GGMLCompatibleTensor': ...


Expand All @@ -345,6 +349,14 @@ def astype(self, data_type: DataType) -> Tensor:
def to_ggml(self) -> 'UnquantizedTensor':
return self

def permute_part(self, n_part: int, n_head: int) -> 'UnquantizedTensor':
r = self.ndarray.shape[0] // 3
return UnquantizedTensor(permute(self.ndarray[r * n_part : r * n_part + r, ...], n_head))

def part(self, n_part: int) -> 'UnquantizedTensor':
r = self.ndarray.shape[0] // 3
return UnquantizedTensor(self.ndarray[r * n_part : r * n_part + r, ...])

def permute(self, n_head: int) -> 'UnquantizedTensor':
return UnquantizedTensor(permute(self.ndarray, n_head))

Expand Down Expand Up @@ -642,6 +654,19 @@ def load() -> Tensor:
return lazy_tensor.load().permute(n_head)
return LazyTensor(load, lazy_tensor.shape, lazy_tensor.data_type, f'permute({n_head}) ' + lazy_tensor.description)

def permute_part_lazy(lazy_tensor: LazyTensor, n_part: int, n_head: int) -> LazyTensor:
def load() -> Tensor:
return lazy_tensor.load().permute_part(n_part, n_head)
s = lazy_tensor.shape.copy()
s[0] = s[0] // 3
return LazyTensor(load, s, lazy_tensor.data_type, f'permute({n_head}) ' + lazy_tensor.description)

def part_lazy(lazy_tensor: LazyTensor, n_part: int) -> LazyTensor:
def load() -> Tensor:
return lazy_tensor.load().part(n_part)
s = lazy_tensor.shape.copy()
s[0] = s[0] // 3
return LazyTensor(load, s, lazy_tensor.data_type, 'part ' + lazy_tensor.description)

def convert_transformers_to_orig(model: LazyModel, params: Params) -> LazyModel:
out: LazyModel = {}
Expand All @@ -650,11 +675,17 @@ def convert_transformers_to_orig(model: LazyModel, params: Params) -> LazyModel:
out["output.weight"] = model["lm_head.weight"]

for i in itertools.count():
if f"model.layers.{i}.self_attn.q_proj.weight" not in model:
if f"model.layers.{i}.self_attn.q_proj.weight" in model:
out[f"layers.{i}.attention.wq.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head)
out[f"layers.{i}.attention.wk.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head)
out[f"layers.{i}.attention.wv.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
out[f"layers.{i}.attention.wq.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head)
out[f"layers.{i}.attention.wk.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head)
out[f"layers.{i}.attention.wv.weight"] = part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 2)
else:
break
out[f"layers.{i}.attention.wq.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head)
out[f"layers.{i}.attention.wk.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head)
out[f"layers.{i}.attention.wv.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]

out[f"layers.{i}.attention.wo.weight"] = model[f"model.layers.{i}.self_attn.o_proj.weight"]

out[f"layers.{i}.feed_forward.w1.weight"] = model[f"model.layers.{i}.mlp.gate_proj.weight"]
Expand Down
2 changes: 1 addition & 1 deletion examples/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
invalid_param = true;
break;
}
params.seed = std::stoi(argv[i]);
params.seed = std::stoul(argv[i]);
} else if (arg == "-t" || arg == "--threads") {
if (++i >= argc) {
invalid_param = true;
Expand Down
2 changes: 1 addition & 1 deletion examples/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
int32_t get_num_physical_cores();

struct gpt_params {
int32_t seed = -1; // RNG seed
uint32_t seed = -1; // RNG seed
int32_t n_threads = get_num_physical_cores();
int32_t n_predict = -1; // new tokens to predict
int32_t n_ctx = 512; // context size
Expand Down
9 changes: 6 additions & 3 deletions examples/embd-input/embd-input-lib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,9 +210,12 @@ llama_token sampling_id(struct MyModel* mymodel) {
const char * sampling(struct MyModel * mymodel) {
llama_context * ctx = mymodel->ctx;
int id = sampling_id(mymodel);
std::string ret;
if (id == llama_token_eos()) ret = "</s>";
else ret = llama_token_to_str(ctx, id);
static std::string ret;
if (id == llama_token_eos()) {
ret = "</s>";
} else {
ret = llama_token_to_str(ctx, id);
}
eval_id(mymodel, id);
return ret.c_str();
}
Expand Down
4 changes: 1 addition & 3 deletions examples/embd-input/embd-input.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#include "llama.h"
#include "build-info.h"


extern "C" {

typedef struct MyModel {
Expand All @@ -14,14 +13,13 @@ typedef struct MyModel {
int n_past = 0;
} MyModel;


struct MyModel* create_mymodel(int argc, char ** argv);

bool eval_float(void* model, float* input, int N);
bool eval_tokens(void* model, std::vector<llama_token> tokens);
bool eval_id(struct MyModel* mymodel, int id);
bool eval_string(struct MyModel* mymodel, const char* str);
const char* sampling(struct MyModel* mymodel);
const char * sampling(struct MyModel* mymodel);
llama_token sampling_id(struct MyModel* mymodel);
void free_mymodel(struct MyModel* mymodel);

Expand Down
4 changes: 2 additions & 2 deletions examples/embedding/embedding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ int main(int argc, char ** argv) {

fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);

if (params.seed < 0) {
if (params.seed == LLAMA_DEFAULT_SEED) {
params.seed = time(NULL);
}

fprintf(stderr, "%s: seed = %d\n", __func__, params.seed);
fprintf(stderr, "%s: seed = %u\n", __func__, params.seed);

std::mt19937 rng(params.seed);
if (params.random_prompt) {
Expand Down
2 changes: 1 addition & 1 deletion examples/main/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ Example usage: `--logit-bias 29905-inf`

### RNG Seed

- `-s SEED, --seed SEED`: Set the random number generator (RNG) seed (default: -1, < 0 = random seed).
- `-s SEED, --seed SEED`: Set the random number generator (RNG) seed (default: -1, -1 = random seed).

The RNG seed is used to initialize the random number generator that influences the text generation process. By setting a specific seed value, you can obtain consistent and reproducible results across multiple runs with the same input and settings. This can be helpful for testing, debugging, or comparing the effects of different options on the generated text to see when they diverge. If the seed is set to a value less than 0, a random seed will be used, which will result in different outputs on each run.

Expand Down
4 changes: 2 additions & 2 deletions examples/main/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,11 @@ int main(int argc, char ** argv) {

fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);

if (params.seed < 0) {
if (params.seed == LLAMA_DEFAULT_SEED) {
params.seed = time(NULL);
}

fprintf(stderr, "%s: seed = %d\n", __func__, params.seed);
fprintf(stderr, "%s: seed = %u\n", __func__, params.seed);

std::mt19937 rng(params.seed);
if (params.random_prompt) {
Expand Down
4 changes: 2 additions & 2 deletions examples/perplexity/perplexity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,11 @@ int main(int argc, char ** argv) {

fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);

if (params.seed < 0) {
if (params.seed == LLAMA_DEFAULT_SEED) {
params.seed = time(NULL);
}

fprintf(stderr, "%s: seed = %d\n", __func__, params.seed);
fprintf(stderr, "%s: seed = %u\n", __func__, params.seed);

std::mt19937 rng(params.seed);
if (params.random_prompt) {
Expand Down
2 changes: 1 addition & 1 deletion examples/server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ node .

`mirostat_eta`: Set the Mirostat learning rate, parameter eta (default: 0.1).

`seed`: Set the random number generator (RNG) seed (default: -1, < 0 = random seed).
`seed`: Set the random number generator (RNG) seed (default: -1, -1 = random seed).

`ignore_eos`: Ignore end of stream token and continue generating (default: false).

Expand Down
9 changes: 5 additions & 4 deletions examples/train-text-from-scratch/train-text-from-scratch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2671,7 +2671,8 @@ struct train_params {
const char * fn_checkpoint_out;
const char * fn_model_out;

int seed;
uint32_t seed;

int n_ctx;
int n_embd;
int n_mult;
Expand Down Expand Up @@ -2768,7 +2769,7 @@ void train_print_usage(int /*argc*/, char ** argv, const struct train_params * p
fprintf(stderr, " --checkpoint-in FNAME path from which to load training checkpoint (default '%s')\n", params->fn_checkpoint_in);
fprintf(stderr, " --checkpoint-out FNAME path to save training checkpoint (default '%s')\n", params->fn_checkpoint_out);
fprintf(stderr, " --model-out FNAME path to save ggml model (default '%s')\n", params->fn_model_out);
fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0)\n");
fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1, use random seed for -1)\n");
fprintf(stderr, " -c N, --ctx N Context size used during training (default %d)\n", params->n_ctx);
fprintf(stderr, " --embd N Embedding size used for new models (default %d)\n", params->n_embd);
fprintf(stderr, " --mult N Mult size used for new models, influences feedforward size. (default %d)\n", params->n_mult);
Expand Down Expand Up @@ -3034,10 +3035,10 @@ int main(int argc, char ** argv) {
return 1;
}

if (params.seed < 0) {
if (params.seed == LLAMA_DEFAULT_SEED) {
params.seed = time(NULL);
}
printf("%s: seed: %d\n", __func__, params.seed);
printf("%s: seed: %u\n", __func__, params.seed);
srand(params.seed);

struct llama_context_params llama_params = llama_context_default_params();
Expand Down
Loading

0 comments on commit 15db19a

Please sign in to comment.