Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove unused n_parts parameter #1509

Merged
merged 1 commit into from
May 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions examples/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -321,12 +321,6 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
invalid_param = true;
break;
}
} else if (arg == "--n-parts") {
if (++i >= argc) {
invalid_param = true;
break;
}
params.n_parts = std::stoi(argv[i]);
} else if (arg == "-h" || arg == "--help") {
gpt_print_usage(argc, argv, default_params);
exit(0);
Expand Down Expand Up @@ -418,7 +412,6 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
fprintf(stderr, " --no-penalize-nl do not penalize newline token\n");
fprintf(stderr, " --memory-f32 use f32 instead of f16 for memory key+value\n");
fprintf(stderr, " --temp N temperature (default: %.1f)\n", (double)params.temp);
fprintf(stderr, " --n-parts N number of model parts (default: -1 = determine from dimensions)\n");
fprintf(stderr, " -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
fprintf(stderr, " --perplexity compute perplexity over the prompt\n");
fprintf(stderr, " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
Expand Down Expand Up @@ -473,7 +466,6 @@ struct llama_context * llama_init_from_gpt_params(const gpt_params & params) {
auto lparams = llama_context_default_params();

lparams.n_ctx = params.n_ctx;
lparams.n_parts = params.n_parts;
lparams.n_gpu_layers = params.n_gpu_layers;
lparams.seed = params.seed;
lparams.f16_kv = params.memory_f16;
Expand Down
1 change: 0 additions & 1 deletion examples/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ struct gpt_params {
int32_t seed = -1; // RNG seed
int32_t n_threads = get_num_physical_cores();
int32_t n_predict = -1; // new tokens to predict
int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
int32_t n_ctx = 512; // context size
int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
int32_t n_keep = 0; // number of tokens to keep from initial prompt
Expand Down
1 change: 0 additions & 1 deletion examples/quantize-stats/quantize-stats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,6 @@ int main(int argc, char ** argv) {
auto lparams = llama_context_default_params();

lparams.n_ctx = 256;
lparams.n_parts = 1;
lparams.seed = 1;
lparams.f16_kv = false;
lparams.use_mlock = false;
Expand Down
1 change: 0 additions & 1 deletion examples/save-load-state/save-load-state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ int main(int argc, char ** argv) {
auto lparams = llama_context_default_params();

lparams.n_ctx = params.n_ctx;
lparams.n_parts = params.n_parts;
lparams.seed = params.seed;
lparams.f16_kv = params.memory_f16;
lparams.use_mmap = params.use_mmap;
Expand Down
1 change: 0 additions & 1 deletion llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -812,7 +812,6 @@ static bool kv_cache_init(
struct llama_context_params llama_context_default_params() {
struct llama_context_params result = {
/*.n_ctx =*/ 512,
/*.n_parts =*/ -1,
/*.gpu_layers =*/ 0,
/*.seed =*/ -1,
/*.f16_kv =*/ false,
Expand Down
1 change: 0 additions & 1 deletion llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ extern "C" {

struct llama_context_params {
int n_ctx; // text context
int n_parts; // -1 for default
int n_gpu_layers; // number of layers to store in VRAM
int seed; // RNG seed, -1 for random

Expand Down