From c1cc0efeca509c2cd03ea7f6a10e78783cb1f2e3 Mon Sep 17 00:00:00 2001 From: Aaron Pham Date: Fri, 13 Sep 2024 14:02:25 -0400 Subject: [PATCH] chore: ignore SIM rules Signed-off-by: Aaron Pham --- format.sh | 4 ++-- pyproject.toml | 8 +++----- tests/multimodal/test_base.py | 2 +- tests/test_logger.py | 4 ++-- vllm/attention/ops/triton_flash_attention.py | 4 ++-- vllm/engine/async_llm_engine.py | 6 +++--- vllm/engine/llm_engine.py | 6 +++--- .../guided_decoding/outlines_logits_processors.py | 4 ++-- 8 files changed, 18 insertions(+), 20 deletions(-) diff --git a/format.sh b/format.sh index 2204b3ba59498..6563d89b192ea 100755 --- a/format.sh +++ b/format.sh @@ -159,7 +159,7 @@ echo 'vLLM codespell: Done' # Lint specified files lint() { - ruff "$@" + ruff check "$@" } # Lint files that differ from main branch. Ignores dirs that are not slated @@ -175,7 +175,7 @@ lint_changed() { if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \ - ruff + ruff check fi } diff --git a/pyproject.toml b/pyproject.toml index 39809e2f58dc8..29fb5fd8eb923 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,8 +12,8 @@ requires = [ build-backend = "setuptools.build_meta" [tool.ruff] -# Allow lines to be as long as 80. -line-length = 80 +# Allow lines to be as long as 119 +line-length = 119 exclude = [ # External file, leaving license intact "examples/fp8/quantizer/quantize.py" @@ -26,11 +26,9 @@ select = [ # Pyflakes "F", # pyupgrade - "UP", + # "UP", # flake8-bugbear "B", - # flake8-simplify - "SIM", # isort # "I", "G", diff --git a/tests/multimodal/test_base.py b/tests/multimodal/test_base.py index e9562d2048f06..9784c29cc19f5 100644 --- a/tests/multimodal/test_base.py +++ b/tests/multimodal/test_base.py @@ -5,7 +5,7 @@ def assert_nested_tensors_equal(expected: NestedTensors, actual: NestedTensors): - assert type(expected) == type(actual) + assert isinstance(expected, actual) if isinstance(expected, torch.Tensor): assert torch.equal(expected, actual) else: diff --git a/tests/test_logger.py b/tests/test_logger.py index 8f3d218416870..837b4d86a569b 100644 --- a/tests/test_logger.py +++ b/tests/test_logger.py @@ -111,7 +111,7 @@ def test_an_error_is_raised_when_custom_logging_config_file_does_not_exist(): configuration occurs.""" with pytest.raises(RuntimeError) as ex_info: _configure_vllm_root_logger() - assert ex_info.type == RuntimeError + assert isinstance(ex_info.type, RuntimeError) assert "File does not exist" in str(ex_info) @@ -152,7 +152,7 @@ def test_an_error_is_raised_when_custom_logging_config_is_unexpected_json( logging_config_file.name): with pytest.raises(ValueError) as ex_info: _configure_vllm_root_logger() - assert ex_info.type == ValueError + assert isinstance(ex_info.type, ValueError) assert "Invalid logging config. Expected Dict, got" in str(ex_info) diff --git a/vllm/attention/ops/triton_flash_attention.py b/vllm/attention/ops/triton_flash_attention.py index f94211116a746..61c83091a5451 100644 --- a/vllm/attention/ops/triton_flash_attention.py +++ b/vllm/attention/ops/triton_flash_attention.py @@ -126,7 +126,7 @@ def _attn_fwd_inner( # We start from end of seqlen_k so only the first iteration would need # to be checked for padding if it is not a multiple of block_n # TODO: This can be optimized to only be true for the padded block. - if MASK_STEPS: # noqa: SIM102 + if MASK_STEPS: # If this is the last block / iteration, we want to # mask if the sequence length is not a multiple of block size # a solution is to always do BLOCK_M // BLOCK_N + 1 steps @@ -621,7 +621,7 @@ def attn_fwd( start_m_idx = start_m * BLOCK_M causal_start_idx = seqlen_q - seqlen_k acc = acc.to(Out.type.element_ty) - if IS_CAUSAL: # noqa: SIM102 + if IS_CAUSAL: if causal_start_idx > start_m_idx and causal_start_idx < end_m_idx: out_mask_boundary = tl.full((BLOCK_DMODEL, ), causal_start_idx, diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index 8a07ce1c965e1..dbca92e418508 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -806,7 +806,7 @@ async def generate( request_id: The unique id of the request. lora_request: LoRA request to use for generation, if any. trace_headers: OpenTelemetry trace headers. - prompt_adapter_request: Prompt Adapter request to use + prompt_adapter_request: Prompt Adapter request to use for generation, if any. Yields: @@ -1022,7 +1022,7 @@ def remove_logger(self, logger_name: str) -> None: async def start_profile(self) -> None: # using type instead of isinstance to check to avoid capturing # inherited classes - if type(self.engine.model_executor) == GPUExecutorAsync: + if isinstance(self.engine.model_executor, GPUExecutorAsync): self.engine.model_executor.start_profile() else: self.engine.model_executor._run_workers("start_profile") @@ -1030,7 +1030,7 @@ async def start_profile(self) -> None: async def stop_profile(self) -> None: # using type instead of isinstance to check to avoid capturing # inherited classes - if type(self.engine.model_executor) == GPUExecutorAsync: + if isinstance(self.engine.model_executor, GPUExecutorAsync): self.engine.model_executor.stop_profile() else: self.engine.model_executor._run_workers("stop_profile") diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index dfdbc22ef00e1..6b562bd4abde0 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -144,7 +144,7 @@ class LLMEngine: decoding. executor_class: The model executor class for managing distributed execution. - prompt_adapter_config (Optional): The configuration related to serving + prompt_adapter_config (Optional): The configuration related to serving prompt adapters. log_stats: Whether to log statistics. usage_context: Specified entry point, used for usage info collection. @@ -1600,7 +1600,7 @@ def check_health(self) -> None: def start_profile(self) -> None: # using type instead of isinstance to check to avoid capturing # inherited classes (MultiprocessingGPUExecutor) - if type(self.model_executor) == GPUExecutor: + if isinstance(self.model_executor, GPUExecutor): self.model_executor.start_profile() else: self.model_executor._run_workers("start_profile") @@ -1608,7 +1608,7 @@ def start_profile(self) -> None: def stop_profile(self) -> None: # using type instead of isinstance to check to avoid capturing # inherited classes (MultiprocessingGPUExecutor) - if type(self.model_executor) == GPUExecutor: + if isinstance(self.model_executor, GPUExecutor): self.model_executor.stop_profile() else: self.model_executor._run_workers("stop_profile") diff --git a/vllm/model_executor/guided_decoding/outlines_logits_processors.py b/vllm/model_executor/guided_decoding/outlines_logits_processors.py index 554dcc0ed43ed..be80d901d899d 100644 --- a/vllm/model_executor/guided_decoding/outlines_logits_processors.py +++ b/vllm/model_executor/guided_decoding/outlines_logits_processors.py @@ -67,9 +67,9 @@ def __call__(self, input_ids: List[int], instruction = self._guide.get_next_instruction( state=self._fsm_state[seq_id]) - if type(instruction) == Generate: + if isinstance(instruction, Generate): allowed_tokens = instruction.tokens - elif type(instruction) == Write: + elif isinstance(instruction, Write): # TODO: support fast forward tokens allowed_tokens = [instruction.tokens[0]] else: