Skip to content

Commit

Permalink
[Serve] Skip test_max_replicas_per_node on Windows (ray-project#40030)
Browse files Browse the repository at this point in the history
ray-project#36926 makes the test flaky by launching extra worker nodes than needed. Seems it's more flaky on Windows so disabling the test on Windows for now until the underlying issue is fixed.

Also make sure we clean up everything even when tests fail.

Signed-off-by: Jiajun Yao <jeromeyjj@gmail.com>
Signed-off-by: Victor <vctr.y.m@example.com>
  • Loading branch information
jjyao authored and Victor committed Oct 11, 2023
1 parent 715de75 commit 0292c2f
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 57 deletions.
20 changes: 20 additions & 0 deletions python/ray/serve/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from ray import serve
from ray._private.test_utils import wait_for_condition
from ray._private.usage import usage_lib
from ray.cluster_utils import AutoscalingCluster, Cluster
from ray.serve.context import _get_global_client
from ray.serve.tests.common.utils import TELEMETRY_ROUTE_PREFIX, check_ray_stopped
from ray.tests.conftest import propagate_logs, pytest_runtest_makereport # noqa
Expand All @@ -29,6 +30,25 @@ def ray_shutdown():
ray.shutdown()


@pytest.fixture
def ray_cluster():
cluster = Cluster()
yield Cluster()
serve.shutdown()
ray.shutdown()
cluster.shutdown()


@pytest.fixture
def ray_autoscaling_cluster(request):
cluster = AutoscalingCluster(**request.param)
cluster.start()
yield
serve.shutdown()
ray.shutdown()
cluster.shutdown()


@pytest.fixture
def ray_start(scope="module"):
port = random.randint(MIN_DYNAMIC_PORT, MAX_DYNAMIC_PORT)
Expand Down
9 changes: 0 additions & 9 deletions python/ray/serve/tests/test_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,6 @@
from ray.serve.handle import RayServeHandle


@pytest.fixture
def ray_cluster():
cluster = Cluster()
yield cluster
serve.shutdown()
ray.shutdown()
cluster.shutdown()


def get_pids(expected, deployment_name="D", app_name="default", timeout=30):
handle = serve.get_deployment_handle(deployment_name, app_name)
refs = []
Expand Down
9 changes: 0 additions & 9 deletions python/ray/serve/tests/test_grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,6 @@
from ray.serve.tests.test_config_files.grpc_deployment import g, g2


@pytest.fixture
def ray_cluster():
cluster = Cluster()
yield Cluster()
serve.shutdown()
ray.shutdown()
cluster.shutdown()


def test_serving_request_through_grpc_proxy(ray_cluster):
"""Test serving request through gRPC proxy.
Expand Down
85 changes: 46 additions & 39 deletions python/ray/serve/tests/test_max_replicas_per_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

import ray
from ray import serve
from ray.cluster_utils import AutoscalingCluster
from ray.serve.drivers import DAGDriver
from ray.util.state import list_actors

Expand All @@ -32,24 +31,32 @@ def get_node_to_deployment_to_num_replicas():
return node_to_deployment_to_num_replicas


def test_basic():
"""Test that max_replicas_per_node is honored."""

cluster = AutoscalingCluster(
head_resources={"CPU": 0},
worker_node_types={
"cpu_node": {
"resources": {
"CPU": 9999,
@pytest.mark.skipif(
sys.platform == "win32",
reason="Flaky on Windows due to https://github.com/ray-project/ray/issues/36926.",
)
@pytest.mark.parametrize(
"ray_autoscaling_cluster",
[
{
"head_resources": {"CPU": 0},
"worker_node_types": {
"cpu_node": {
"resources": {
"CPU": 9999,
},
"node_config": {},
"min_workers": 0,
"max_workers": 100,
},
"node_config": {},
"min_workers": 0,
"max_workers": 100,
},
},
)
}
],
indirect=True,
)
def test_basic(ray_autoscaling_cluster):
"""Test that max_replicas_per_node is honored."""

cluster.start()
ray.init()

@serve.deployment
Expand Down Expand Up @@ -78,29 +85,33 @@ def __call__(self):
assert deployment_to_num_replicas["deploy1"] == 3
assert deployment_to_num_replicas["deploy2"] == 1

serve.shutdown()
ray.shutdown()
cluster.shutdown()


def test_update_max_replicas_per_node():
"""Test re-deploying a deployment with different max_replicas_per_node."""

cluster = AutoscalingCluster(
head_resources={"CPU": 0},
worker_node_types={
"cpu_node": {
"resources": {
"CPU": 9999,
@pytest.mark.skipif(
sys.platform == "win32",
reason="Flaky on Windows due to https://github.com/ray-project/ray/issues/36926.",
)
@pytest.mark.parametrize(
"ray_autoscaling_cluster",
[
{
"head_resources": {"CPU": 0},
"worker_node_types": {
"cpu_node": {
"resources": {
"CPU": 9999,
},
"node_config": {},
"min_workers": 0,
"max_workers": 100,
},
"node_config": {},
"min_workers": 0,
"max_workers": 100,
},
},
)
}
],
indirect=True,
)
def test_update_max_replicas_per_node(ray_autoscaling_cluster):
"""Test re-deploying a deployment with different max_replicas_per_node."""

cluster.start()
ray.init()

@serve.deployment
Expand Down Expand Up @@ -136,10 +147,6 @@ def __call__(self):
# Every node has 1 replica.
assert deployment_to_num_replicas["deploy1"] == 1

serve.shutdown()
ray.shutdown()
cluster.shutdown()


if __name__ == "__main__":
sys.exit(pytest.main(["-v", "-s", __file__]))

0 comments on commit 0292c2f

Please sign in to comment.