Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH Use type aliases for nested std::vector types #31

Merged
merged 1 commit into from
Aug 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 24 additions & 24 deletions bark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ bool fine_gpt_eval(
const gpt_model & model,
const int n_threads,
const int codebook_ix,
const std::vector<std::vector<bark_vocab::id>> & embd_inp,
const bark_codes & embd_inp,
std::vector<std::vector<float>> & logits,
size_t & mem_per_token) {
// embd_inp: (n_channels, seq_length)
Expand Down Expand Up @@ -854,7 +854,7 @@ bool gpt_eval(
const int n_threads,
const int n_past,
const bool merge_ctx,
const std::vector<bark_vocab::id> & embd_inp,
const bark_sequence & embd_inp,
std::vector<float> & embd_w,
size_t & mem_per_token) {
int N = embd_inp.size();
Expand Down Expand Up @@ -1227,20 +1227,20 @@ bark_vocab::id gpt_sample(
return gpt_multinomial_sample(logits, rng, temp, eos_p);
}

std::vector<bark_vocab::id> bark_forward_text_encoder(
const std::vector<bark_vocab::id> & tokens,
bark_sequence bark_forward_text_encoder(
const bark_sequence & tokens,
const gpt_model model,
std::mt19937 & rng,
const int n_threads,
const float temp,
const bool early_stop,
const float min_eos_p) {

std::vector<bark_vocab::id> out;
bark_sequence out;
int n_past = 0;
float eos_p = 0;

std::vector<bark_vocab::id> input = tokens;
bark_sequence input = tokens;
std::vector<float> logits;

// dry run to estimate mem_per_token
Expand Down Expand Up @@ -1280,16 +1280,16 @@ std::vector<bark_vocab::id> bark_forward_text_encoder(
return out;
}

std::vector<std::vector<bark_vocab::id>> bark_forward_coarse_encoder(
const std::vector<bark_vocab::id> & tokens,
bark_codes bark_forward_coarse_encoder(
const bark_sequence & tokens,
const gpt_model model,
std::mt19937 & rng,
const int n_threads,
const float temp,
const int max_coarse_history,
const int sliding_window_size) {
std::vector<std::vector<bark_vocab::id>> out_coarse(N_COARSE_CODEBOOKS);
std::vector<bark_vocab::id> out;
bark_codes out_coarse(N_COARSE_CODEBOOKS);
bark_sequence out;

float semantic_to_coarse_ratio = COARSE_RATE_HZ / SEMANTIC_RATE_HZ * N_COARSE_CODEBOOKS;
int max_semantic_history = floorf(max_coarse_history / semantic_to_coarse_ratio);
Expand All @@ -1302,7 +1302,7 @@ std::vector<std::vector<bark_vocab::id>> bark_forward_coarse_encoder(

int n_window_steps = ceilf(static_cast<float>(n_steps) / sliding_window_size);

std::vector<bark_vocab::id> input = tokens;
bark_sequence input = tokens;
std::vector<float> logits;

// dry run to estimate mem_per_token
Expand All @@ -1312,7 +1312,7 @@ std::vector<std::vector<bark_vocab::id>> bark_forward_coarse_encoder(
for(int i = 0; i < n_window_steps; i++) {
int semantic_ix = roundf(n_steps / semantic_to_coarse_ratio);

std::vector<bark_vocab::id> input_in(
bark_sequence input_in(
input.begin() + std::max(semantic_ix-max_semantic_history, 0),
input.end()
);
Expand Down Expand Up @@ -1377,13 +1377,13 @@ std::vector<std::vector<bark_vocab::id>> bark_forward_coarse_encoder(
return out_coarse;
}

std::vector<std::vector<bark_vocab::id>> bark_forward_fine_encoder(
const std::vector<std::vector<bark_vocab::id>> & tokens,
bark_codes bark_forward_fine_encoder(
const bark_codes & tokens,
const gpt_model model,
std::mt19937 & rng,
const int n_threads,
const float temp) {
std::vector<std::vector<bark_vocab::id>> input = tokens;
bark_codes input = tokens;
std::vector<std::vector<float>> logits;

size_t mem_per_token = 0;
Expand All @@ -1394,7 +1394,7 @@ std::vector<std::vector<bark_vocab::id>> bark_forward_fine_encoder(

// channel padding
for(int i = N_COARSE_CODEBOOKS; i < N_FINE_CODEBOOKS; i++) {
std::vector<bark_vocab::id> tmp(original_seq_len, CODEBOOK_SIZE);
bark_sequence tmp(original_seq_len, CODEBOOK_SIZE);
input.push_back(tmp);
}

Expand All @@ -1413,23 +1413,23 @@ std::vector<std::vector<bark_vocab::id>> bark_forward_fine_encoder(

int n_loops = std::max(0, (int) ceilf((input[0].size() - 1024)/512.f)) + 1;

std::vector<std::vector<bark_vocab::id>> in_arr = input;
bark_codes in_arr = input;

for (int n = 0; n < n_loops; n++) {
int start_ix = std::min(n * 512, (int) in_arr[0].size() - 1024);
int start_fill_ix = std::min(n * 512, (int) in_arr[0].size() - 512);
int rel_start_fill_ix = start_fill_ix - start_ix;

std::vector<std::vector<bark_vocab::id>> in_buffer(in_arr.size());
bark_codes in_buffer(in_arr.size());
for (int ix = 0; ix < (int) in_buffer.size(); ix++) {
std::vector<bark_vocab::id> buf(in_arr[ix].begin() + start_ix, in_arr[ix].begin() + start_ix + 1024);
bark_sequence buf(in_arr[ix].begin() + start_ix, in_arr[ix].begin() + start_ix + 1024);
in_buffer[ix] = buf;
}

for (int nn = n_coarse; nn < N_FINE_CODEBOOKS; nn++) {
fine_gpt_eval(model, n_threads, nn, in_buffer, logits, mem_per_token);

std::vector<bark_vocab::id> predictions(CODEBOOK_SIZE - rel_start_fill_ix);
bark_sequence predictions(CODEBOOK_SIZE - rel_start_fill_ix);

for (int i = 0; i < (int) logits.size(); i++) {
logits[i].resize(CODEBOOK_SIZE);
Expand Down Expand Up @@ -1462,7 +1462,7 @@ bool bark_generate_audio(
const bark_vocab& vocab,
const char * text,
const int n_threads) {
std::vector<bark_vocab::id> tokens;
bark_sequence tokens;

// TODO move into params
// const int top_k = 10;
Expand Down Expand Up @@ -1519,15 +1519,15 @@ bool bark_generate_audio(
printf("\n\n");

// encode text (text model)
std::vector<bark_vocab::id> out_semantic = bark_forward_text_encoder(
bark_sequence out_semantic = bark_forward_text_encoder(
tokens, model.text_model, rng, n_threads, temp, early_stop, min_eos_p);

// coarse encoding (coarse model)
std::vector<std::vector<bark_vocab::id>> out_coarse = bark_forward_coarse_encoder(
bark_codes out_coarse = bark_forward_coarse_encoder(
out_semantic, model.coarse_model, rng, n_threads, temp, max_coarse_history, sliding_window_size);

// fine encoding (fine model)
std::vector<std::vector<bark_vocab::id>> out_fine = bark_forward_fine_encoder(
bark_codes out_fine = bark_forward_fine_encoder(
out_coarse, model.fine_model, rng, n_threads, fine_temp);

return true;
Expand Down
15 changes: 9 additions & 6 deletions bark.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ struct bark_vocab {
std::map<id, token> id_to_subword_token;
};

typedef std::vector<bark_vocab::id> bark_sequence;
typedef std::vector<std::vector<bark_vocab::id>> bark_codes;

struct gpt_layer {
// normalization
struct ggml_tensor * ln_1_g;
Expand Down Expand Up @@ -120,7 +123,7 @@ bool gpt_eval(
const int n_threads,
const int n_past,
const bool merge_ctx,
const std::vector<bark_vocab::id> & embd_inp,
const bark_sequence & embd_inp,
std::vector<float> & embd_w,
size_t & mem_per_token);

Expand All @@ -147,16 +150,16 @@ bool bark_generate_audio(
const char * text,
const int n_threads);

std::vector<bark_vocab::id> bark_forward_text_encoder(
const std::vector<bark_vocab::id> & tokens,
bark_sequence bark_forward_text_encoder(
const bark_sequence & tokens,
const gpt_model model,
std::mt19937 & rng,
const int n_threads,
const float temp,
const bool early_stop,
const float min_eos_p);

std::vector<std::vector<bark_vocab::id>> bark_forward_coarse_encoder(
bark_codes bark_forward_coarse_encoder(
const std::vector<bark_vocab::id> & tokens,
const gpt_model model,
std::mt19937 & rng,
Expand All @@ -165,8 +168,8 @@ std::vector<std::vector<bark_vocab::id>> bark_forward_coarse_encoder(
const int max_coarse_history,
const int sliding_window_size);

std::vector<std::vector<bark_vocab::id>> bark_forward_fine_encoder(
const std::vector<std::vector<bark_vocab::id>> & tokens,
bark_codes bark_forward_fine_encoder(
const bark_codes & tokens,
const gpt_model model,
std::mt19937 & rng,
const int n_threads,
Expand Down
18 changes: 9 additions & 9 deletions tests/test-coarse-encoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,17 @@
#include <random>
#include <vector>

static const std::map<std::vector<bark_vocab::id>, std::vector<std::vector<bark_vocab::id>>> & k_tests()
static const std::map<bark_sequence, bark_codes> & k_tests()
{
static const std::vector<bark_vocab::id> seq1 = { 215, 1988, 3275, 1898, 1898, 1898, 9372, 9372, 222, 334, 8568, 8568, 7963, 222, 8568, 55, 7963, 1270, 55, 1283, 1283, 222, 1283, 1283, 1283, 55, 1283, 5960, 5960, 5960, 5960, 5960, 5960, 5960, 231, 5960, 5960, 5960, 5960, 5960, 5960, 5960, 5960, 5960, 5960, 5960, 5960, 5960, 340, 5960, 5960, 5960, 5960, 1374, 4193, 4193, 9323, 1374, 1374, 1374, 1374, 4193, 1374, 4193, 1374, 1374, 4193, 1374, 231, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 8328, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 9318, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374 };
static const std::vector<bark_vocab::id> seq2 = { 59, 28, 28, 107, 7409, 1999, 7695, 6486, 6486, 5836, 5836, 5836, 873, 2585, 92, 92, 59, 28, 28, 107, 315, 5623, 1025, 10, 173, 125, 7385, 147, 147, 3689, 302, 9600, 6876, 6876, 321, 41, 164, 1367, 739, 41, 10, 140, 140, 6202, 6051, 6051, 4071, 9804, 8583, 677, 3, 17, 113, 9414, 5419, 5419, 3831, 3663, 3663, 3663, 2224, 2224, 2224, 73, 9144, 9144, 1667, 1997, 1957, 1093, 825, 175, 175, 1087, 736, 1233, 230, 147, 147, 230, 230, 230, 230, 230, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 1613, 528, 1613, 1613, 1613, 1613, 1613, 1613, 1613, 1613, 1613, 1613, 1613, 2009, 2009 };
static const std::vector<bark_vocab::id> seq3 = { 10, 10, 560, 10, 9602, 10, 10, 10, 302, 2363, 2919, 6860, 5127, 7134, 7134, 3934, 3934, 3352, 3352, 3507, 50, 10, 27, 27, 3320, 6107, 9891, 9891, 9891, 321, 41, 4287, 5667, 6152, 6152, 557, 1228, 12, 12, 200, 59, 28, 28, 28, 28, 1133, 9569, 5920, 1424, 1424, 51, 51, 682, 3820, 2107, 6059, 348, 210, 10, 10, 5, 2187, 7842, 988, 1728, 1728, 438, 366, 50, 27, 27, 181, 181, 7352, 9725, 4431, 6445, 2428, 41, 41, 41, 5119, 6557, 4212, 3963, 26, 26, 934, 1025, 1024, 173, 10, 41, 5467, 6684, 6684, 6684, 4958, 41, 298, 5982, 5982, 526, 3219, 122, 181, 10, 10, 884, 3446, 2599, 4478, 4478, 2549 };
static const bark_sequence seq1 = { 215, 1988, 3275, 1898, 1898, 1898, 9372, 9372, 222, 334, 8568, 8568, 7963, 222, 8568, 55, 7963, 1270, 55, 1283, 1283, 222, 1283, 1283, 1283, 55, 1283, 5960, 5960, 5960, 5960, 5960, 5960, 5960, 231, 5960, 5960, 5960, 5960, 5960, 5960, 5960, 5960, 5960, 5960, 5960, 5960, 5960, 340, 5960, 5960, 5960, 5960, 1374, 4193, 4193, 9323, 1374, 1374, 1374, 1374, 4193, 1374, 4193, 1374, 1374, 4193, 1374, 231, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 8328, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 9318, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374, 1374 };
static const bark_sequence seq2 = { 59, 28, 28, 107, 7409, 1999, 7695, 6486, 6486, 5836, 5836, 5836, 873, 2585, 92, 92, 59, 28, 28, 107, 315, 5623, 1025, 10, 173, 125, 7385, 147, 147, 3689, 302, 9600, 6876, 6876, 321, 41, 164, 1367, 739, 41, 10, 140, 140, 6202, 6051, 6051, 4071, 9804, 8583, 677, 3, 17, 113, 9414, 5419, 5419, 3831, 3663, 3663, 3663, 2224, 2224, 2224, 73, 9144, 9144, 1667, 1997, 1957, 1093, 825, 175, 175, 1087, 736, 1233, 230, 147, 147, 230, 230, 230, 230, 230, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 1613, 528, 1613, 1613, 1613, 1613, 1613, 1613, 1613, 1613, 1613, 1613, 1613, 2009, 2009 };
static const bark_sequence seq3 = { 10, 10, 560, 10, 9602, 10, 10, 10, 302, 2363, 2919, 6860, 5127, 7134, 7134, 3934, 3934, 3352, 3352, 3507, 50, 10, 27, 27, 3320, 6107, 9891, 9891, 9891, 321, 41, 4287, 5667, 6152, 6152, 557, 1228, 12, 12, 200, 59, 28, 28, 28, 28, 1133, 9569, 5920, 1424, 1424, 51, 51, 682, 3820, 2107, 6059, 348, 210, 10, 10, 5, 2187, 7842, 988, 1728, 1728, 438, 366, 50, 27, 27, 181, 181, 7352, 9725, 4431, 6445, 2428, 41, 41, 41, 5119, 6557, 4212, 3963, 26, 26, 934, 1025, 1024, 173, 10, 41, 5467, 6684, 6684, 6684, 4958, 41, 298, 5982, 5982, 526, 3219, 122, 181, 10, 10, 884, 3446, 2599, 4478, 4478, 2549 };

static const std::vector<std::vector<bark_vocab::id>> ans1 = { {}, {} };
static const std::vector<std::vector<bark_vocab::id>> ans2 = { {}, {} };
static const std::vector<std::vector<bark_vocab::id>> ans3 = { {}, {} };
static const bark_codes ans1 = { {}, {} };
static const bark_codes ans2 = { {}, {} };
static const bark_codes ans3 = { {}, {} };

static std::map<std::vector<bark_vocab::id>, std::vector<std::vector<bark_vocab::id>>> _k_tests = {
static std::map<bark_sequence, bark_codes> _k_tests = {
// { seq1, ans1 }, // hello world
// { seq2, ans2 }, // this is an audio
{ seq3, ans3 }, // You cannot, sir, take from me anything
Expand Down Expand Up @@ -48,7 +48,7 @@ int main(int argc, char** argv) {
}

for (const auto & test_kv : k_tests()) {
std::vector<std::vector<bark_vocab::id>> res = bark_forward_coarse_encoder(
bark_codes res = bark_forward_coarse_encoder(
test_kv.first, model, rng, n_threads, temp, max_coarse_history, sliding_window_size);

bool correct = res.size() == test_kv.second.size();
Expand Down
18 changes: 9 additions & 9 deletions tests/test-fine-encoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,17 @@
#include <random>
#include <vector>

static const std::map<std::vector<std::vector<bark_vocab::id>>, std::vector<std::vector<bark_vocab::id>>> & k_tests()
static const std::map<bark_codes, bark_codes> & k_tests()
{
static const std::vector<std::vector<bark_vocab::id>> seq1 = {};
static const std::vector<std::vector<bark_vocab::id>> seq2 = {};
static const std::vector<std::vector<bark_vocab::id>> seq3 = {};
static const bark_codes seq1 = {};
static const bark_codes seq2 = {};
static const bark_codes seq3 = {};

static const std::vector<std::vector<bark_vocab::id>> ans1 = { {}, {} };
static const std::vector<std::vector<bark_vocab::id>> ans2 = { {}, {} };
static const std::vector<std::vector<bark_vocab::id>> ans3 = { {}, {} };
static const bark_codes ans1 = { {}, {} };
static const bark_codes ans2 = { {}, {} };
static const bark_codes ans3 = { {}, {} };

static std::map<std::vector<std::vector<bark_vocab::id>>, std::vector<std::vector<bark_vocab::id>>> _k_tests = {
static std::map<bark_codes, bark_codes> _k_tests = {
// { seq1, ans1 }, // hello world
// { seq2, ans2 }, // this is an audio
{ seq3, ans3 }, // You cannot, sir, take from me anything
Expand Down Expand Up @@ -45,7 +45,7 @@ int main(int argc, char** argv) {
}

for (const auto & test_kv : k_tests()) {
std::vector<std::vector<bark_vocab::id>> res = bark_forward_fine_encoder(
bark_codes res = bark_forward_fine_encoder(
test_kv.first, model, rng, n_threads, temp);

bool correct = res.size() == test_kv.second.size();
Expand Down
Loading