diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 307ada611a859..c102a53214786 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -89,8 +89,8 @@ steps:
   mirror_hardwares: [amd]
 
   commands:
-  - pytest -v -s entrypoints -m llm
-  - pytest -v -s entrypoints -m openai
+  - pytest -v -s entrypoints/llm
+  - pytest -v -s entrypoints/openai
 
 - label: Examples Test
   working_dir: "/vllm-workspace/examples"
diff --git a/pyproject.toml b/pyproject.toml
index 4958aae02594a..790e013620286 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -69,7 +69,5 @@ skip_gitignore = true
 [tool.pytest.ini_options]
 markers = [
     "skip_global_cleanup",
-    "llm: run tests for vLLM API only",
-    "openai: run tests for OpenAI API only",
     "vlm: run tests for vision language models only",
 ]
diff --git a/tests/entrypoints/llm/__init__.py b/tests/entrypoints/llm/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/tests/entrypoints/test_llm_encode.py b/tests/entrypoints/llm/test_encode.py
similarity index 98%
rename from tests/entrypoints/test_llm_encode.py
rename to tests/entrypoints/llm/test_encode.py
index 12a0a1a269ede..7c02a6e3d8d64 100644
--- a/tests/entrypoints/test_llm_encode.py
+++ b/tests/entrypoints/llm/test_encode.py
@@ -10,7 +10,7 @@
     pytest.skip("TEST_ENTRYPOINTS=DISABLE, skipping entrypoints group",
                 allow_module_level=True)
 
-from ..conftest import cleanup
+from ...conftest import cleanup
 
 MODEL_NAME = "intfloat/e5-mistral-7b-instruct"
 
@@ -30,8 +30,6 @@
     [1000, 1003, 1001, 1002],
 ]
 
-pytestmark = pytest.mark.llm
-
 
 @pytest.fixture(scope="module")
 def llm():
diff --git a/tests/entrypoints/test_llm_generate.py b/tests/entrypoints/llm/test_generate.py
similarity index 98%
rename from tests/entrypoints/test_llm_generate.py
rename to tests/entrypoints/llm/test_generate.py
index 96b47fb5e170b..4f257e10310f2 100644
--- a/tests/entrypoints/test_llm_generate.py
+++ b/tests/entrypoints/llm/test_generate.py
@@ -6,7 +6,7 @@
 from tests.nm_utils.utils_skip import should_skip_test_group
 from vllm import LLM, RequestOutput, SamplingParams
 
-from ..conftest import cleanup
+from ...conftest import cleanup
 
 if should_skip_test_group(group_name="TEST_ENTRYPOINTS"):
     pytest.skip("TEST_ENTRYPOINTS=DISABLE, skipping entrypoints group",
@@ -28,8 +28,6 @@
     [0, 3, 1, 2],
 ]
 
-pytestmark = pytest.mark.llm
-
 
 @pytest.fixture(scope="module")
 def llm():
diff --git a/tests/entrypoints/test_llm_generate_multiple_loras.py b/tests/entrypoints/llm/test_generate_multiple_loras.py
similarity index 97%
rename from tests/entrypoints/test_llm_generate_multiple_loras.py
rename to tests/entrypoints/llm/test_generate_multiple_loras.py
index 743aba44060c1..cbe1a79779a40 100644
--- a/tests/entrypoints/test_llm_generate_multiple_loras.py
+++ b/tests/entrypoints/llm/test_generate_multiple_loras.py
@@ -9,7 +9,7 @@
 from vllm import LLM
 from vllm.lora.request import LoRARequest
 
-from ..conftest import cleanup
+from ...conftest import cleanup
 
 if should_skip_test_group(group_name="TEST_ENTRYPOINTS"):
     pytest.skip("TEST_ENTRYPOINTS=DISABLE, skipping entrypoints group",
@@ -26,8 +26,6 @@
 
 LORA_NAME = "typeof/zephyr-7b-beta-lora"
 
-pytestmark = pytest.mark.llm
-
 
 @pytest.fixture(scope="module")
 def llm():
diff --git a/tests/entrypoints/openai/__init__.py b/tests/entrypoints/openai/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/tests/entrypoints/test_openai_chat.py b/tests/entrypoints/openai/test_chat.py
similarity index 99%
rename from tests/entrypoints/test_openai_chat.py
rename to tests/entrypoints/openai/test_chat.py
index 52e647170f6af..f4c0af1adfdf9 100644
--- a/tests/entrypoints/test_openai_chat.py
+++ b/tests/entrypoints/openai/test_chat.py
@@ -14,7 +14,7 @@
 from huggingface_hub import snapshot_download
 from openai import BadRequestError
 
-from ..utils import RemoteOpenAIServer
+from ...utils import RemoteOpenAIServer
 
 # any model with a chat template should work here
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
@@ -69,8 +69,6 @@
     "Swift", "Kotlin"
 ]
 
-pytestmark = pytest.mark.openai
-
 
 @pytest.fixture(scope="module")
 def zephyr_lora_files():
diff --git a/tests/entrypoints/test_openai_completion.py b/tests/entrypoints/openai/test_completion.py
similarity index 99%
rename from tests/entrypoints/test_openai_completion.py
rename to tests/entrypoints/openai/test_completion.py
index da5de3666be50..b05035713d7be 100644
--- a/tests/entrypoints/test_openai_completion.py
+++ b/tests/entrypoints/openai/test_completion.py
@@ -16,7 +16,7 @@
 
 from vllm.transformers_utils.tokenizer import get_tokenizer
 
-from ..utils import RemoteOpenAIServer
+from ...utils import RemoteOpenAIServer
 
 # any model with a chat template should work here
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
@@ -71,8 +71,6 @@
     "Swift", "Kotlin"
 ]
 
-pytestmark = pytest.mark.openai
-
 
 @pytest.fixture(scope="module")
 def zephyr_lora_files():
diff --git a/tests/entrypoints/test_openai_embedding.py b/tests/entrypoints/openai/test_embedding.py
similarity index 92%
rename from tests/entrypoints/test_openai_embedding.py
rename to tests/entrypoints/openai/test_embedding.py
index 1930c46f3d915..f35baece6cf4f 100644
--- a/tests/entrypoints/test_openai_embedding.py
+++ b/tests/entrypoints/openai/test_embedding.py
@@ -2,21 +2,15 @@
 import pytest
 import ray
 
-<<<<<<< HEAD
 from tests.nm_utils.utils_skip import should_skip_test_group
-
-from ..utils import VLLM_PATH, RemoteOpenAIServer
-=======
-from ..utils import RemoteOpenAIServer
->>>>>>> dd793d1d ([Hardware][AMD][CI/Build][Doc] Upgrade to ROCm 6.1, Dockerfile improvements, test fixes (#5422))
+from ...utils import RemoteOpenAIServer
 
 if should_skip_test_group(group_name="TEST_ENTRYPOINTS"):
     pytest.skip("TEST_ENTRYPOINTS=DISABLE, skipping entrypoints group",
                 allow_module_level=True)
 
-EMBEDDING_MODEL_NAME = "intfloat/e5-mistral-7b-instruct"
 
-pytestmark = pytest.mark.openai
+EMBEDDING_MODEL_NAME = "intfloat/e5-mistral-7b-instruct"
 
 
 @pytest.fixture(scope="module")
diff --git a/tests/entrypoints/test_guided_processors.py b/tests/entrypoints/openai/test_guided_processors.py
similarity index 99%
rename from tests/entrypoints/test_guided_processors.py
rename to tests/entrypoints/openai/test_guided_processors.py
index fe5c709b1db21..88688a8c2eac2 100644
--- a/tests/entrypoints/test_guided_processors.py
+++ b/tests/entrypoints/openai/test_guided_processors.py
@@ -57,8 +57,6 @@
 TEST_REGEX = (r"((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.){3}"
               r"(25[0-5]|(2[0-4]|1\d|[1-9]|)\d)")
 
-pytestmark = pytest.mark.openai
-
 
 def test_guided_logits_processors():
     """Basic unit test for RegexLogitsProcessor and JSONLogitsProcessor."""
diff --git a/tests/entrypoints/test_openai_server.py b/tests/entrypoints/openai/test_models.py
similarity index 96%
rename from tests/entrypoints/test_openai_server.py
rename to tests/entrypoints/openai/test_models.py
index 35fc74749d26f..358728146089a 100644
--- a/tests/entrypoints/test_openai_server.py
+++ b/tests/entrypoints/openai/test_models.py
@@ -7,7 +7,7 @@
 from huggingface_hub import snapshot_download
 
 from tests.nm_utils.utils_skip import should_skip_test_group
-from ..utils import RemoteOpenAIServer
+from ...utils import RemoteOpenAIServer
 
 if should_skip_test_group(group_name="TEST_ENTRYPOINTS"):
     pytest.skip("TEST_ENTRYPOINTS=DISABLE, skipping entrypoints group",
@@ -19,8 +19,6 @@
 # generation quality here
 LORA_NAME = "typeof/zephyr-7b-beta-lora"
 
-pytestmark = pytest.mark.openai
-
 
 @pytest.fixture(scope="module")
 def zephyr_lora_files():
diff --git a/tests/entrypoints/test_server_oot_registration.py b/tests/entrypoints/openai/test_oot_registration.py
similarity index 98%
rename from tests/entrypoints/test_server_oot_registration.py
rename to tests/entrypoints/openai/test_oot_registration.py
index 394594fcbf085..2263d620d0803 100644
--- a/tests/entrypoints/test_server_oot_registration.py
+++ b/tests/entrypoints/openai/test_oot_registration.py
@@ -15,8 +15,6 @@
     pytest.skip("TEST_ENTRYPOINTS=DISABLE, skipping entrypoints group",
                 allow_module_level=True)
 
-pytestmark = pytest.mark.openai
-
 
 class MyOPTForCausalLM(OPTForCausalLM):
 
diff --git a/tests/entrypoints/test_openai_run_batch.py b/tests/entrypoints/openai/test_run_batch.py
similarity index 100%
rename from tests/entrypoints/test_openai_run_batch.py
rename to tests/entrypoints/openai/test_run_batch.py
diff --git a/tests/entrypoints/openai/test_serving_chat.py b/tests/entrypoints/openai/test_serving_chat.py
index e3168f67e001f..18060e86c08d9 100644
--- a/tests/entrypoints/openai/test_serving_chat.py
+++ b/tests/entrypoints/openai/test_serving_chat.py
@@ -13,8 +13,6 @@
 MODEL_NAME = "openai-community/gpt2"
 CHAT_TEMPLATE = "Dummy chat template for testing {}"
 
-pytestmark = pytest.mark.openai
-
 
 @dataclass
 class MockModelConfig:
diff --git a/tests/entrypoints/test_openai_vision.py b/tests/entrypoints/openai/test_vision.py
similarity index 97%
rename from tests/entrypoints/test_openai_vision.py
rename to tests/entrypoints/openai/test_vision.py
index d3e24a61a1f3e..5f3475eb7f945 100644
--- a/tests/entrypoints/test_openai_vision.py
+++ b/tests/entrypoints/openai/test_vision.py
@@ -1,4 +1,3 @@
-from pathlib import Path
 from typing import Dict, List
 
 import openai
@@ -9,16 +8,16 @@
 from tests.nm_utils.utils_skip import should_skip_test_group
 from vllm.multimodal.utils import ImageFetchAiohttp, encode_image_base64
 
-from ..utils import RemoteOpenAIServer
+from ...utils import VLLM_PATH, RemoteOpenAIServer
 
 if should_skip_test_group(group_name="TEST_ENTRYPOINTS"):
     pytest.skip("TEST_ENTRYPOINTS=DISABLE, skipping entrypoints group",
                 allow_module_level=True)
 
 MODEL_NAME = "llava-hf/llava-1.5-7b-hf"
-LLAVA_CHAT_TEMPLATE = (Path(__file__).parent.parent.parent /
-                       "examples/template_llava.jinja")
+LLAVA_CHAT_TEMPLATE = VLLM_PATH / "examples/template_llava.jinja"
 assert LLAVA_CHAT_TEMPLATE.exists()
+
 # Test different image extensions (JPG/PNG) and formats (gray/RGB/RGBA)
 TEST_IMAGE_URLS = [
     "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
@@ -27,8 +26,6 @@
     "https://upload.wikimedia.org/wikipedia/commons/0/0b/RGBA_comp.png",
 ]
 
-pytestmark = pytest.mark.openai
-
 
 @pytest.fixture(scope="module")
 def ray_ctx():
@@ -284,7 +281,3 @@ async def test_multi_image_input(client: openai.AsyncOpenAI, model_name: str,
     )
     completion = completion.choices[0].text
     assert completion is not None and len(completion) >= 0
-
-
-if __name__ == "__main__":
-    pytest.main([__file__])
diff --git a/tests/utils.py b/tests/utils.py
index 2a5f82b91c42c..09107b5e7e2b7 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -4,7 +4,8 @@
 import time
 import warnings
 from contextlib import contextmanager
-from typing import Dict, List
+from pathlib import Path
+from typing import Any, Dict, List
 
 import openai
 import ray
@@ -40,8 +41,8 @@ def _nvml():
             nvmlShutdown()
 
 
-# Path to root of repository so that utilities can be imported by ray workers
-VLLM_PATH = os.path.abspath(os.path.join(__file__, os.pardir, os.pardir))
+VLLM_PATH = Path(__file__).parent.parent
+"""Path to root of the vLLM repository."""
 
 
 class RemoteOpenAIServer:
@@ -153,10 +154,12 @@ def init_test_distributed_environment(
 def multi_process_parallel(
     tp_size: int,
     pp_size: int,
-    test_target,
+    test_target: Any,
 ) -> None:
     # Using ray helps debugging the error when it failed
     # as compared to multiprocessing.
+    # NOTE: We need to set working_dir for distributed tests,
+    # otherwise we may get import errors on ray workers
     ray.init(runtime_env={"working_dir": VLLM_PATH})
 
     distributed_init_port = get_open_port()