diff --git a/python/ray/serve/tests/conftest.py b/python/ray/serve/tests/conftest.py
index 4e48f8b2badfd..b86c7f04a353e 100644
--- a/python/ray/serve/tests/conftest.py
+++ b/python/ray/serve/tests/conftest.py
@@ -10,6 +10,7 @@
 from ray import serve
 from ray._private.test_utils import wait_for_condition
 from ray._private.usage import usage_lib
+from ray.cluster_utils import AutoscalingCluster, Cluster
 from ray.serve.context import _get_global_client
 from ray.serve.tests.common.utils import TELEMETRY_ROUTE_PREFIX, check_ray_stopped
 from ray.tests.conftest import propagate_logs, pytest_runtest_makereport  # noqa
@@ -29,6 +30,25 @@ def ray_shutdown():
     ray.shutdown()
 
 
+@pytest.fixture
+def ray_cluster():
+    cluster = Cluster()
+    yield Cluster()
+    serve.shutdown()
+    ray.shutdown()
+    cluster.shutdown()
+
+
+@pytest.fixture
+def ray_autoscaling_cluster(request):
+    cluster = AutoscalingCluster(**request.param)
+    cluster.start()
+    yield
+    serve.shutdown()
+    ray.shutdown()
+    cluster.shutdown()
+
+
 @pytest.fixture
 def ray_start(scope="module"):
     port = random.randint(MIN_DYNAMIC_PORT, MAX_DYNAMIC_PORT)
diff --git a/python/ray/serve/tests/test_cluster.py b/python/ray/serve/tests/test_cluster.py
index 67d50ed28d0a3..642af88e4d511 100644
--- a/python/ray/serve/tests/test_cluster.py
+++ b/python/ray/serve/tests/test_cluster.py
@@ -19,15 +19,6 @@
 from ray.serve.handle import RayServeHandle
 
 
-@pytest.fixture
-def ray_cluster():
-    cluster = Cluster()
-    yield cluster
-    serve.shutdown()
-    ray.shutdown()
-    cluster.shutdown()
-
-
 def get_pids(expected, deployment_name="D", app_name="default", timeout=30):
     handle = serve.get_deployment_handle(deployment_name, app_name)
     refs = []
diff --git a/python/ray/serve/tests/test_grpc.py b/python/ray/serve/tests/test_grpc.py
index 1a20dfc9a5090..9b6ae282ec536 100644
--- a/python/ray/serve/tests/test_grpc.py
+++ b/python/ray/serve/tests/test_grpc.py
@@ -26,15 +26,6 @@
 from ray.serve.tests.test_config_files.grpc_deployment import g, g2
 
 
-@pytest.fixture
-def ray_cluster():
-    cluster = Cluster()
-    yield Cluster()
-    serve.shutdown()
-    ray.shutdown()
-    cluster.shutdown()
-
-
 def test_serving_request_through_grpc_proxy(ray_cluster):
     """Test serving request through gRPC proxy.
 
diff --git a/python/ray/serve/tests/test_max_replicas_per_node.py b/python/ray/serve/tests/test_max_replicas_per_node.py
index 7b59bf02d6c8d..98d87408b2390 100644
--- a/python/ray/serve/tests/test_max_replicas_per_node.py
+++ b/python/ray/serve/tests/test_max_replicas_per_node.py
@@ -5,7 +5,6 @@
 
 import ray
 from ray import serve
-from ray.cluster_utils import AutoscalingCluster
 from ray.serve.drivers import DAGDriver
 from ray.util.state import list_actors
 
@@ -32,24 +31,32 @@ def get_node_to_deployment_to_num_replicas():
     return node_to_deployment_to_num_replicas
 
 
-def test_basic():
-    """Test that max_replicas_per_node is honored."""
-
-    cluster = AutoscalingCluster(
-        head_resources={"CPU": 0},
-        worker_node_types={
-            "cpu_node": {
-                "resources": {
-                    "CPU": 9999,
+@pytest.mark.skipif(
+    sys.platform == "win32",
+    reason="Flaky on Windows due to https://github.com/ray-project/ray/issues/36926.",
+)
+@pytest.mark.parametrize(
+    "ray_autoscaling_cluster",
+    [
+        {
+            "head_resources": {"CPU": 0},
+            "worker_node_types": {
+                "cpu_node": {
+                    "resources": {
+                        "CPU": 9999,
+                    },
+                    "node_config": {},
+                    "min_workers": 0,
+                    "max_workers": 100,
                 },
-                "node_config": {},
-                "min_workers": 0,
-                "max_workers": 100,
             },
-        },
-    )
+        }
+    ],
+    indirect=True,
+)
+def test_basic(ray_autoscaling_cluster):
+    """Test that max_replicas_per_node is honored."""
 
-    cluster.start()
     ray.init()
 
     @serve.deployment
@@ -78,29 +85,33 @@ def __call__(self):
         assert deployment_to_num_replicas["deploy1"] == 3
         assert deployment_to_num_replicas["deploy2"] == 1
 
-    serve.shutdown()
-    ray.shutdown()
-    cluster.shutdown()
-
 
-def test_update_max_replicas_per_node():
-    """Test re-deploying a deployment with different max_replicas_per_node."""
-
-    cluster = AutoscalingCluster(
-        head_resources={"CPU": 0},
-        worker_node_types={
-            "cpu_node": {
-                "resources": {
-                    "CPU": 9999,
+@pytest.mark.skipif(
+    sys.platform == "win32",
+    reason="Flaky on Windows due to https://github.com/ray-project/ray/issues/36926.",
+)
+@pytest.mark.parametrize(
+    "ray_autoscaling_cluster",
+    [
+        {
+            "head_resources": {"CPU": 0},
+            "worker_node_types": {
+                "cpu_node": {
+                    "resources": {
+                        "CPU": 9999,
+                    },
+                    "node_config": {},
+                    "min_workers": 0,
+                    "max_workers": 100,
                 },
-                "node_config": {},
-                "min_workers": 0,
-                "max_workers": 100,
             },
-        },
-    )
+        }
+    ],
+    indirect=True,
+)
+def test_update_max_replicas_per_node(ray_autoscaling_cluster):
+    """Test re-deploying a deployment with different max_replicas_per_node."""
 
-    cluster.start()
     ray.init()
 
     @serve.deployment
@@ -136,10 +147,6 @@ def __call__(self):
         # Every node has 1 replica.
         assert deployment_to_num_replicas["deploy1"] == 1
 
-    serve.shutdown()
-    ray.shutdown()
-    cluster.shutdown()
-
 
 if __name__ == "__main__":
     sys.exit(pytest.main(["-v", "-s", __file__]))