Skip to content

Commit

Permalink
Merge branch 'upstream' into concedo_experimental
Browse files Browse the repository at this point in the history
# Conflicts:
#	.ecrc
#	CMakePresets.json
#	ci/run.sh
#	docs/backend/SYCL.md
#	ggml/src/CMakeLists.txt
#	src/llama.cpp
#	tests/test-backend-ops.cpp
#	tests/test-sampling.cpp
  • Loading branch information
LostRuins committed Aug 27, 2024
2 parents a22a666 + 2e59d61 commit b2c1ff7
Show file tree
Hide file tree
Showing 30 changed files with 8,034 additions and 7,257 deletions.
82 changes: 69 additions & 13 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,41 @@

using json = nlohmann::ordered_json;

//
// Environment variable utils
//

template<typename T>
static typename std::enable_if<std::is_same<T, std::string>::value, void>::type
get_env(std::string name, T & target) {
char * value = std::getenv(name.c_str());
target = value ? std::string(value) : target;
}

template<typename T>
static typename std::enable_if<!std::is_same<T, bool>::value && std::is_integral<T>::value, void>::type
get_env(std::string name, T & target) {
char * value = std::getenv(name.c_str());
target = value ? std::stoi(value) : target;
}

template<typename T>
static typename std::enable_if<std::is_floating_point<T>::value, void>::type
get_env(std::string name, T & target) {
char * value = std::getenv(name.c_str());
target = value ? std::stof(value) : target;
}

template<typename T>
static typename std::enable_if<std::is_same<T, bool>::value, void>::type
get_env(std::string name, T & target) {
char * value = std::getenv(name.c_str());
if (value) {
std::string val(value);
target = val == "1" || val == "true";
}
}

//
// CPU utils
//
Expand Down Expand Up @@ -221,12 +256,6 @@ int32_t cpu_get_num_math() {
// CLI argument parsing
//

void gpt_params_handle_hf_token(gpt_params & params) {
if (params.hf_token.empty() && std::getenv("HF_TOKEN")) {
params.hf_token = std::getenv("HF_TOKEN");
}
}

void gpt_params_handle_model_default(gpt_params & params) {
if (!params.hf_repo.empty()) {
// short-hand to avoid specifying --hf-file -> default it to --model
Expand Down Expand Up @@ -274,7 +303,9 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {

gpt_params_handle_model_default(params);

gpt_params_handle_hf_token(params);
if (params.hf_token.empty()) {
get_env("HF_TOKEN", params.hf_token);
}

if (params.escape) {
string_process_escapes(params.prompt);
Expand All @@ -294,6 +325,25 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
return true;
}

void gpt_params_parse_from_env(gpt_params & params) {
// we only care about server-related params for now
get_env("LLAMA_ARG_MODEL", params.model);
get_env("LLAMA_ARG_THREADS", params.n_threads);
get_env("LLAMA_ARG_CTX_SIZE", params.n_ctx);
get_env("LLAMA_ARG_N_PARALLEL", params.n_parallel);
get_env("LLAMA_ARG_BATCH", params.n_batch);
get_env("LLAMA_ARG_UBATCH", params.n_ubatch);
get_env("LLAMA_ARG_N_GPU_LAYERS", params.n_gpu_layers);
get_env("LLAMA_ARG_THREADS_HTTP", params.n_threads_http);
get_env("LLAMA_ARG_CHAT_TEMPLATE", params.chat_template);
get_env("LLAMA_ARG_N_PREDICT", params.n_predict);
get_env("LLAMA_ARG_ENDPOINT_METRICS", params.endpoint_metrics);
get_env("LLAMA_ARG_ENDPOINT_SLOTS", params.endpoint_slots);
get_env("LLAMA_ARG_EMBEDDINGS", params.embedding);
get_env("LLAMA_ARG_FLASH_ATTN", params.flash_attn);
get_env("LLAMA_ARG_DEFRAG_THOLD", params.defrag_thold);
}

bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
const auto params_org = params; // the example can modify the default params

Expand Down Expand Up @@ -852,7 +902,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
}
return true;
}
if (arg == "-ngld" || arg == "--gpu-layers-draft" || arg == "--gpu-layers-draft") {
if (arg == "-ngld" || arg == "--gpu-layers-draft" || arg == "--n-gpu-layers-draft") {
CHECK_ARG
params.n_gpu_layers_draft = std::stoi(argv[i]);
if (!llama_supports_gpu_offload()) {
Expand Down Expand Up @@ -1812,13 +1862,19 @@ std::string string_get_sortable_timestamp() {

void string_replace_all(std::string & s, const std::string & search, const std::string & replace) {
if (search.empty()) {
return; // Avoid infinite loop if 'search' is an empty string
return;
}
std::string builder;
builder.reserve(s.length());
size_t pos = 0;
while ((pos = s.find(search, pos)) != std::string::npos) {
s.replace(pos, search.length(), replace);
pos += replace.length();
}
size_t last_pos = 0;
while ((pos = s.find(search, last_pos)) != std::string::npos) {
builder.append(s, last_pos, pos - last_pos);
builder.append(replace);
last_pos = pos + search.length();
}
builder.append(s, last_pos, std::string::npos);
s = std::move(builder);
}

void string_process_escapes(std::string & input) {
Expand Down
2 changes: 1 addition & 1 deletion common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ struct gpt_params {
std::string lora_outfile = "ggml-lora-merged-f16.gguf";
};

void gpt_params_handle_hf_token(gpt_params & params);
void gpt_params_parse_from_env(gpt_params & params);
void gpt_params_handle_model_default(gpt_params & params);

bool gpt_params_parse_ex (int argc, char ** argv, gpt_params & params);
Expand Down
Loading

0 comments on commit b2c1ff7

Please sign in to comment.