diff --git a/llama.cpp b/llama.cpp index 7b261b73e2210..d8c244bda43bf 100644 --- a/llama.cpp +++ b/llama.cpp @@ -7034,6 +7034,7 @@ void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * c // Replace the data in candidates with the new_candidates data std::copy(new_candidates.begin(), new_candidates.end(), candidates->data); candidates->size = new_candidates.size(); + candidates->sorted = false; if (ctx) { ctx->t_sample_us += ggml_time_us() - t_start_sample_us;