Skip to content

Commit

Permalink
Fix test cases and add timeout tests in gloo_group_isolation
Browse files Browse the repository at this point in the history
Signed-off-by: Hollow Man <hollowman@opensuse.org>
  • Loading branch information
HollowMan6 committed Feb 28, 2025
1 parent 616a82d commit 5d70c6c
Showing 1 changed file with 21 additions and 4 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from python.ray.util.collective.types import Backend
from python.ray.util.collective.collective_group.gloo_collective_group import GLOOGroup
import ray
import ray.util.collective as col
import time
Expand All @@ -9,18 +10,34 @@ class Worker:
def __init__(self):
pass

def init_gloo_group(rank: int, world_size: int, group_name: str):
col.init_collective_group(world_size, rank, Backend.GLOO, group_name)
def init_gloo_group(
self, world_size: int, rank: int, group_name: str, gloo_timeout: int = 30000
):
col.init_collective_group(
world_size, rank, Backend.GLOO, group_name, gloo_timeout
)
return True

def get_gloo_timeout(self, group_name: str) -> bool:
g = col.get_group_handle(group_name)
# Check if the group is initialized correctly
assert isinstance(g, GLOOGroup)
return g._gloo_context.getTimeout()


def test_two_groups_in_one_cluster(ray_start_regular_shared):
name1 = "name_1"
name2 = "name_2"
time1 = 40000
time2 = 60000
w1 = Worker.remote()
ret1 = w1.init_gloo_group.remote(1, 0, "name_1")
ret1 = w1.init_gloo_group.remote(1, 0, name1, time1)
w2 = Worker.remote()
ret2 = w2.init_gloo_group.remote(1, 0, "name_2")
ret2 = w2.init_gloo_group.remote(1, 0, name2, time2)
assert ray.get(ret1)
assert ray.get(ret2)
assert ray.get(w1.get_gloo_timeout.remote(name1)) == time1
assert ray.get(w2.get_gloo_timeout.remote(name2)) == time2


def test_failure_when_initializing(shutdown_only):
Expand Down

0 comments on commit 5d70c6c

Please sign in to comment.