Skip to content

Commit

Permalink
clear kv_cache
Browse files Browse the repository at this point in the history
should fix big bug in llm daemon long-term running!
  • Loading branch information
Chris Kennedy committed Mar 23, 2024
1 parent 310755a commit 3f80a03
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 2 deletions.
2 changes: 1 addition & 1 deletion scripts/twitch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ MODEL_ID=7b-it
MAX_TOKENS=200
ALIGNMENT=right
TEMPERATURE=1.0
CONTINUOUS=0
CONTINUOUS=1
POLL_INTERVAL=0
PIPELINE_CONCURRENCY=3
TWITCH_LLM_CONCURRENCY=3
Expand Down
1 change: 1 addition & 0 deletions src/candle_gemma.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ impl TextGeneration {
async fn run(&mut self, prompt: &str, sample_len: usize) -> Result<()> {
let verbose_prompt: bool = false;

self.model.clear_kv_cache();
self.tokenizer.clear();
let mut tokens = self
.tokenizer
Expand Down
5 changes: 4 additions & 1 deletion src/candle_mistral.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ impl TextGeneration {

async fn run(&mut self, prompt: &str, sample_len: usize) -> Result<()> {
let verbose_prompt: bool = false;
match &mut self.model {
Model::Mistral(m) => m.clear_kv_cache(),
Model::Quantized(m) => m.clear_kv_cache(),
};
self.tokenizer.clear();
let mut tokens = self
.tokenizer
Expand Down Expand Up @@ -103,7 +107,6 @@ impl TextGeneration {
let start_pos = tokens.len().saturating_sub(context_size);
let ctxt = &tokens[start_pos..];
let input = Tensor::new(ctxt, &self.device)?.unsqueeze(0)?;
//Model::Mistral7binstructV02(m) => m.forward(&input, start_pos)?,
let logits = match &mut self.model {
Model::Mistral(m) => match m.forward(&input, start_pos) {
Ok(logits) => logits,
Expand Down

0 comments on commit 3f80a03

Please sign in to comment.