Skip to content

Commit

Permalink
[Performance] e2e overheads reduction: Small followup diff (vllm-proj…
Browse files Browse the repository at this point in the history
  • Loading branch information
alexm-neuralmagic authored Aug 9, 2024
1 parent 67abdbb commit fc7b8d1
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 2 deletions.
4 changes: 2 additions & 2 deletions vllm/core/block_manager_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,9 +336,9 @@ def allocate(self, seq_group: SequenceGroup) -> None:

# Assign the self-attention block tables for each sequence.
if len(wait_seqs) == 1:
self.block_tables[wait_seqs[0].seq_id] = block_table
self.block_tables[seq.seq_id] = block_table
else:
for seq in seq_group.get_seqs(status=SequenceStatus.WAITING):
for seq in wait_seqs:
self.block_tables[seq.seq_id] = block_table.copy()

# Allocate encoder sequence
Expand Down
3 changes: 3 additions & 0 deletions vllm/sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,9 @@ def get_unfinished_seqs(self) -> List[Sequence]:
return [seq for seq in self.seqs if not seq.is_finished()]

def get_finished_seqs(self) -> List[Sequence]:
if self.is_single_seq:
return self.seqs if self.seqs[0].is_finished() else []

return [seq for seq in self.seqs if seq.is_finished()]

def update_num_computed_tokens(self, num_new_computed_tokens: int):
Expand Down

0 comments on commit fc7b8d1

Please sign in to comment.