diff --git a/vllm/attention/backends/flash_attn.py b/vllm/attention/backends/flash_attn.py index 300bab72877b8..1c48e2a0bb33d 100644 --- a/vllm/attention/backends/flash_attn.py +++ b/vllm/attention/backends/flash_attn.py @@ -83,7 +83,7 @@ class FlashAttentionMetadata(AttentionMetadata): # |---------------- N iteration ---------------------| # |- tokenA -|......................|-- newTokens ---| # |---------- context_len ----------| - # |-------------------- seq_len ----------------------| + # |-------------------- seq_len ---------------------| # |-- query_len ---| # Maximum query length in the batch. None for decoding.