Skip to content

Commit

Permalink
AllToAll
Browse files Browse the repository at this point in the history
  • Loading branch information
OsKnoth committed Jul 19, 2024
1 parent db831e2 commit d867792
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 1 deletion.
1 change: 0 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
ProfileCanvas = "efd6af41-a80b-495e-886c-e51b0c7d77a3"
Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
RootSolvers = "7181ea78-2dcb-4de3-ab41-2b8ab5a31e74"
ScoreP = "754d78bd-90ed-4ac3-9051-8d885452e256"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
SpecialPolynomials = "a25cea48-d430-424a-8ee7-0d3ad3742e9e"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Expand Down
23 changes: 23 additions & 0 deletions testAllToAllCUDA.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
using MPI
using CUDA
MPI.Init()
comm = MPI.COMM_WORLD
rank = MPI.Comm_rank(comm)
# select device
comm_l = MPI.Comm_split_type(comm, MPI.COMM_TYPE_SHARED, rank)
rank_l = MPI.Comm_rank(comm_l)
gpu_id = CUDA.device!(rank_l)
# select device
size = MPI.Comm_size(comm)
dst = mod(rank+1, size)
src = mod(rank-1, size)
println("rank=$rank rank_loc=$rank_l (gpu_id=$gpu_id), size=$size, dst=$dst, src=$src")
N = 4
send_mesg = CuArray{Float64}(undef, N)
recv_mesg = CuArray{Float64}(undef, N)
fill!(send_mesg, Float64(rank))
CUDA.synchronize()
rank==0 && println("start sending...")
MPI.Sendrecv!(send_mesg, dst, 0, recv_mesg, src, 0, comm)
println("recv_mesg on proc $rank_l: $recv_mesg")
rank==0 && println("done.")
24 changes: 24 additions & 0 deletions testAllToAllROCM.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
using MPI
using AMDGPU
MPI.Init()
comm = MPI.COMM_WORLD
rank = MPI.Comm_rank(comm)
# select device
comm_l = MPI.Comm_split_type(comm, MPI.COMM_TYPE_SHARED, rank)
rank_l = MPI.Comm_rank(comm_l)
device = AMDGPU.device_id!(rank_l+1)
gpu_id = AMDGPU.device_id(AMDGPU.device())
# select device
size = MPI.Comm_size(comm)
dst = mod(rank+1, size)
src = mod(rank-1, size)
println("rank=$rank rank_loc=$rank_l (gpu_id=$gpu_id - $device), size=$size, dst=$dst, src=$src")
N = 4
send_mesg = ROCArray{Float64}(undef, N)
recv_mesg = ROCArray{Float64}(undef, N)
fill!(send_mesg, Float64(rank))
AMDGPU.synchronize()
rank==0 && println("start sending...")
MPI.Sendrecv!(send_mesg, dst, 0, recv_mesg, src, 0, comm)
println("recv_mesg on proc $rank: $recv_mesg")
rank==0 && println("done.")

0 comments on commit d867792

Please sign in to comment.