Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add attention and final logit soft-capping, update scaling factor to Gemma2 #8197

Merged
merged 10 commits into from
Jun 30, 2024
Prev Previous commit
Next Next commit
Add default value for attention and final logit softcap value
  • Loading branch information
abetlen committed Jun 29, 2024
commit bb7159927dfcd94fd550dc673d03ce02e392aed4
8 changes: 4 additions & 4 deletions src/llama.cpp
Original file line number Diff line number Diff line change
@@ -2103,8 +2103,8 @@ struct llama_hparams {
float f_norm_eps;
float f_norm_rms_eps;

float f_attn_logit_softcapping;
float f_final_logit_softcapping;
float f_attn_logit_softcapping = 50.0f;
float f_final_logit_softcapping = 30.0f;

float rope_attn_factor = 1.0f;
float rope_freq_base_train;
@@ -4710,8 +4710,8 @@ static void llm_load_hparams(
case LLM_ARCH_GEMMA2:
{
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
ml.get_key(LLM_KV_ATTN_LOGIT_SOFTCAPPING, hparams.f_attn_logit_softcapping);
ml.get_key(LLM_KV_FINAL_LOGIT_SOFTCAPPING, hparams.f_final_logit_softcapping);
ml.get_key(LLM_KV_ATTN_LOGIT_SOFTCAPPING, hparams.f_attn_logit_softcapping, false);
ml.get_key(LLM_KV_FINAL_LOGIT_SOFTCAPPING, hparams.f_final_logit_softcapping, false);
hparams.attn_soft_cap = true;

switch (hparams.n_layer) {