Skip to content

Commit

Permalink
[core][misc] improve free_finished_seq_groups (vllm-project#6865)
Browse files Browse the repository at this point in the history
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
  • Loading branch information
2 people authored and kylesayrs committed Aug 17, 2024
1 parent 2d43e96 commit ed6c218
Showing 1 changed file with 12 additions and 7 deletions.
19 changes: 12 additions & 7 deletions vllm/core/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ def __init__(
# Sequence groups finished requests ids since last step iteration.
# It lets the model know that any state associated with these requests
# can and must be released after the current step.
# This is used to evict the finished requests from the Mamba cache.
self._finished_requests_ids: List[str] = list()
# Time at previous scheduling step
self.prev_time = 0.0
Expand Down Expand Up @@ -374,6 +375,7 @@ def abort_seq_group(self, request_id: Union[str, Iterable[str]]) -> None:
for aborted_group in aborted_groups:
# Remove the sequence group from the state queue.
state_queue.remove(aborted_group)
# Remove the aborted request from the Mamba cache.
self._finished_requests_ids.append(aborted_group.request_id)
for seq in aborted_group.get_seqs():
if seq.is_finished():
Expand Down Expand Up @@ -1057,13 +1059,16 @@ def free_seq(self, seq: Sequence) -> None:
self.block_manager.free(seq)

def free_finished_seq_groups(self) -> None:
for queue in [self.running, self.swapped, self.waiting]:
self._finished_requests_ids += [
seq_group.request_id for seq_group in queue
if seq_group.is_finished()
]
self.running = deque(seq_group for seq_group in self.running
if not seq_group.is_finished())
remaining: Deque[SequenceGroup] = deque()
for seq_group in self.running:
if seq_group.is_finished():
# Add the finished requests to the finished requests list.
# This list will be used to update the Mamba cache in the
# next step.
self._finished_requests_ids.append(seq_group.request_id)
else:
remaining.append(seq_group)
self.running = remaining

def _allocate_and_set_running(self, seq_group: SequenceGroup) -> None:
self.block_manager.allocate(seq_group)
Expand Down

0 comments on commit ed6c218

Please sign in to comment.