diff --git a/src/OrthogonalSphericalShellGrids.jl b/src/OrthogonalSphericalShellGrids.jl index 6679e1b..9c8e786 100644 --- a/src/OrthogonalSphericalShellGrids.jl +++ b/src/OrthogonalSphericalShellGrids.jl @@ -12,7 +12,8 @@ using Oceananigans.Grids: R_Earth, halo_size, spherical_area_quadrilateral, lat_lon_to_cartesian, generate_coordinate, topology using Oceananigans.Operators -using Oceananigans.Utils: get_cartesian_nodes_and_vertices + +using Oceananigans.Utils: get_cartesian_nodes_and_vertices using Adapt using JLD2 @@ -28,6 +29,8 @@ include("generate_tripolar_coordinates.jl") include("tripolar_grid.jl") include("grid_extensions.jl") include("distributed_tripolar_grid.jl") +include("distributed_zipper.jl") +include("distributed_zipper_north_tags.jl") include("with_halo.jl") include("split_explicit_free_surface.jl") diff --git a/src/distributed_tripolar_grid.jl b/src/distributed_tripolar_grid.jl index 3387e57..b7f9186 100644 --- a/src/distributed_tripolar_grid.jl +++ b/src/distributed_tripolar_grid.jl @@ -1,6 +1,9 @@ +using MPI +using Oceananigans.BoundaryConditions: DistributedCommunicationBoundaryCondition using Oceananigans.DistributedComputations using Oceananigans.DistributedComputations: local_size, barrier!, + all_reduce, ranks, inject_halo_communication_boundary_conditions, concatenate_local_sizes @@ -26,9 +29,23 @@ function TripolarGrid(arch::Distributed, FT::DataType=Float64; kwargs...) workers = ranks(arch.partition) + px = ifelse(isnothing(arch.partition.x), 1, arch.partition.x) + py = ifelse(isnothing(arch.partition.y), 1, arch.partition.y) - workers[1] != 1 && - throw(ArgumentError("The tripolar grid is supported only on a Y-partitioning configuration")) + # Check that partitioning in x is correct: + try + if isodd(px) && (px != 1) + throw(ArgumentError("Only even partitioning in x is supported with the TripolarGrid")) + end + catch + throw(ArgumentError("The x partition $(px) is not supported. The partition in x must be an even number. ")) + end + + # a slab decomposition in x is not supported + if px != 1 && py == 1 + throw(ArgumentError("A x-only partitioning is not supported with the TripolarGrid. \n + Please, use a y partitioning configuration or a x-y pencil partitioning.")) + end Hx, Hy, Hz = halo @@ -41,39 +58,49 @@ function TripolarGrid(arch::Distributed, FT::DataType=Float64; lsize = local_size(arch, global_size) # Extracting the local range - nlocal = concatenate_local_sizes(lsize, arch, 2) - rank = arch.local_rank - - jstart = 1 + sum(nlocal[1:rank]) - jend = rank == workers[2] - 1 ? Ny : sum(nlocal[1:rank+1]) + nylocal = concatenate_local_sizes(lsize, arch, 2) + nxlocal = concatenate_local_sizes(lsize, arch, 1) + yrank = ifelse(isnothing(arch.partition.x), 0, arch.local_index[2] - 1) + xrank = ifelse(isnothing(arch.partition.x), 0, arch.local_index[1] - 1) + + # The j-range + jstart = 1 + sum(nylocal[1:yrank]) + jend = yrank == workers[2] - 1 ? Ny : sum(nylocal[1:yrank+1]) jrange = jstart-Hy:jend+Hy + # The i-range + istart = 1 + sum(nxlocal[1:xrank]) + iend = xrank == workers[1] - 1 ? Nx : sum(nxlocal[1:xrank+1]) + irange = istart-Hx:iend+Hx + # Partitioning the Coordinates - λᶠᶠᵃ = partition_tripolar_metric(global_grid, :λᶠᶠᵃ, jrange) - φᶠᶠᵃ = partition_tripolar_metric(global_grid, :φᶠᶠᵃ, jrange) - λᶠᶜᵃ = partition_tripolar_metric(global_grid, :λᶠᶜᵃ, jrange) - φᶠᶜᵃ = partition_tripolar_metric(global_grid, :φᶠᶜᵃ, jrange) - λᶜᶠᵃ = partition_tripolar_metric(global_grid, :λᶜᶠᵃ, jrange) - φᶜᶠᵃ = partition_tripolar_metric(global_grid, :φᶜᶠᵃ, jrange) - λᶜᶜᵃ = partition_tripolar_metric(global_grid, :λᶜᶜᵃ, jrange) - φᶜᶜᵃ = partition_tripolar_metric(global_grid, :φᶜᶜᵃ, jrange) + λᶠᶠᵃ = partition_tripolar_metric(global_grid, :λᶠᶠᵃ, irange, jrange) + φᶠᶠᵃ = partition_tripolar_metric(global_grid, :φᶠᶠᵃ, irange, jrange) + λᶠᶜᵃ = partition_tripolar_metric(global_grid, :λᶠᶜᵃ, irange, jrange) + φᶠᶜᵃ = partition_tripolar_metric(global_grid, :φᶠᶜᵃ, irange, jrange) + λᶜᶠᵃ = partition_tripolar_metric(global_grid, :λᶜᶠᵃ, irange, jrange) + φᶜᶠᵃ = partition_tripolar_metric(global_grid, :φᶜᶠᵃ, irange, jrange) + λᶜᶜᵃ = partition_tripolar_metric(global_grid, :λᶜᶜᵃ, irange, jrange) + φᶜᶜᵃ = partition_tripolar_metric(global_grid, :φᶜᶜᵃ, irange, jrange) # Partitioning the Metrics - Δxᶜᶜᵃ = partition_tripolar_metric(global_grid, :Δxᶜᶜᵃ, jrange) - Δxᶠᶜᵃ = partition_tripolar_metric(global_grid, :Δxᶠᶜᵃ, jrange) - Δxᶜᶠᵃ = partition_tripolar_metric(global_grid, :Δxᶜᶠᵃ, jrange) - Δxᶠᶠᵃ = partition_tripolar_metric(global_grid, :Δxᶠᶠᵃ, jrange) - Δyᶜᶜᵃ = partition_tripolar_metric(global_grid, :Δyᶜᶜᵃ, jrange) - Δyᶠᶜᵃ = partition_tripolar_metric(global_grid, :Δyᶠᶜᵃ, jrange) - Δyᶜᶠᵃ = partition_tripolar_metric(global_grid, :Δyᶜᶠᵃ, jrange) - Δyᶠᶠᵃ = partition_tripolar_metric(global_grid, :Δyᶠᶠᵃ, jrange) - Azᶜᶜᵃ = partition_tripolar_metric(global_grid, :Azᶜᶜᵃ, jrange) - Azᶠᶜᵃ = partition_tripolar_metric(global_grid, :Azᶠᶜᵃ, jrange) - Azᶜᶠᵃ = partition_tripolar_metric(global_grid, :Azᶜᶠᵃ, jrange) - Azᶠᶠᵃ = partition_tripolar_metric(global_grid, :Azᶠᶠᵃ, jrange) - - LY = rank == 0 ? RightConnected : FullyConnected - ny = nlocal[rank+1] + Δxᶜᶜᵃ = partition_tripolar_metric(global_grid, :Δxᶜᶜᵃ, irange, jrange) + Δxᶠᶜᵃ = partition_tripolar_metric(global_grid, :Δxᶠᶜᵃ, irange, jrange) + Δxᶜᶠᵃ = partition_tripolar_metric(global_grid, :Δxᶜᶠᵃ, irange, jrange) + Δxᶠᶠᵃ = partition_tripolar_metric(global_grid, :Δxᶠᶠᵃ, irange, jrange) + Δyᶜᶜᵃ = partition_tripolar_metric(global_grid, :Δyᶜᶜᵃ, irange, jrange) + Δyᶠᶜᵃ = partition_tripolar_metric(global_grid, :Δyᶠᶜᵃ, irange, jrange) + Δyᶜᶠᵃ = partition_tripolar_metric(global_grid, :Δyᶜᶠᵃ, irange, jrange) + Δyᶠᶠᵃ = partition_tripolar_metric(global_grid, :Δyᶠᶠᵃ, irange, jrange) + Azᶜᶜᵃ = partition_tripolar_metric(global_grid, :Azᶜᶜᵃ, irange, jrange) + Azᶠᶜᵃ = partition_tripolar_metric(global_grid, :Azᶠᶜᵃ, irange, jrange) + Azᶜᶠᵃ = partition_tripolar_metric(global_grid, :Azᶜᶠᵃ, irange, jrange) + Azᶠᶠᵃ = partition_tripolar_metric(global_grid, :Azᶠᶠᵃ, irange, jrange) + + LY = yrank == 0 ? RightConnected : FullyConnected + LX = workers[1] == 1 ? Periodic : FullyConnected + ny = nylocal[yrank+1] + nx = nxlocal[xrank+1] zᵃᵃᶜ = global_grid.zᵃᵃᶜ zᵃᵃᶠ = global_grid.zᵃᵃᶠ @@ -81,75 +108,151 @@ function TripolarGrid(arch::Distributed, FT::DataType=Float64; Δzᵃᵃᶠ = global_grid.Δzᵃᵃᶠ radius = global_grid.radius - grid = OrthogonalSphericalShellGrid{Periodic, LY, Bounded}(arch, - Nx, ny, Nz, - Hx, Hy, Hz, - convert(eltype(radius), global_grid.Lz), - on_architecture(arch, λᶜᶜᵃ), - on_architecture(arch, λᶠᶜᵃ), - on_architecture(arch, λᶜᶠᵃ), - on_architecture(arch, λᶠᶠᵃ), - on_architecture(arch, φᶜᶜᵃ), - on_architecture(arch, φᶠᶜᵃ), - on_architecture(arch, φᶜᶠᵃ), - on_architecture(arch, φᶠᶠᵃ), - on_architecture(arch, zᵃᵃᶜ), - on_architecture(arch, zᵃᵃᶠ), - on_architecture(arch, Δxᶜᶜᵃ), - on_architecture(arch, Δxᶠᶜᵃ), - on_architecture(arch, Δxᶜᶠᵃ), - on_architecture(arch, Δxᶠᶠᵃ), - on_architecture(arch, Δyᶜᶜᵃ), - on_architecture(arch, Δyᶜᶠᵃ), - on_architecture(arch, Δyᶠᶜᵃ), - on_architecture(arch, Δyᶠᶠᵃ), - on_architecture(arch, Δzᵃᵃᶜ), - on_architecture(arch, Δzᵃᵃᶠ), - on_architecture(arch, Azᶜᶜᵃ), - on_architecture(arch, Azᶠᶜᵃ), - on_architecture(arch, Azᶜᶠᵃ), - on_architecture(arch, Azᶠᶠᵃ), - radius, - global_grid.conformal_mapping) + # Fix corners halos passing in case workers[1] != 1 + if workers[1] != 1 + northwest_idx_x = ranks(arch)[1] - arch.local_index[1] + 2 + northeast_idx_x = ranks(arch)[1] - arch.local_index[1] + + if northwest_idx_x > workers[1] + northwest_idx_x = arch.local_index[1] + end + + if northeast_idx_x < 1 + northeast_idx_x = arch.local_index[1] + end + + # Make sure the northwest and northeast connectivities are correct + northwest_recv_rank = receiving_rank(arch; receive_idx_x = northwest_idx_x) + northeast_recv_rank = receiving_rank(arch; receive_idx_x = northeast_idx_x) + north_recv_rank = receiving_rank(arch) + + if yrank == workers[2] - 1 + arch.connectivity.northwest = northwest_recv_rank + arch.connectivity.northeast = northeast_recv_rank + arch.connectivity.north = north_recv_rank + end + end + + grid = OrthogonalSphericalShellGrid{LX, LY, Bounded}(arch, + nx, ny, Nz, + Hx, Hy, Hz, + convert(eltype(radius), global_grid.Lz), + on_architecture(arch, λᶜᶜᵃ), + on_architecture(arch, λᶠᶜᵃ), + on_architecture(arch, λᶜᶠᵃ), + on_architecture(arch, λᶠᶠᵃ), + on_architecture(arch, φᶜᶜᵃ), + on_architecture(arch, φᶠᶜᵃ), + on_architecture(arch, φᶜᶠᵃ), + on_architecture(arch, φᶠᶠᵃ), + on_architecture(arch, zᵃᵃᶜ), + on_architecture(arch, zᵃᵃᶠ), + on_architecture(arch, Δxᶜᶜᵃ), + on_architecture(arch, Δxᶠᶜᵃ), + on_architecture(arch, Δxᶜᶠᵃ), + on_architecture(arch, Δxᶠᶠᵃ), + on_architecture(arch, Δyᶜᶜᵃ), + on_architecture(arch, Δyᶜᶠᵃ), + on_architecture(arch, Δyᶠᶜᵃ), + on_architecture(arch, Δyᶠᶠᵃ), + on_architecture(arch, Δzᵃᵃᶜ), + on_architecture(arch, Δzᵃᵃᶠ), + on_architecture(arch, Azᶜᶜᵃ), + on_architecture(arch, Azᶠᶜᵃ), + on_architecture(arch, Azᶜᶠᵃ), + on_architecture(arch, Azᶠᶠᵃ), + radius, + global_grid.conformal_mapping) return grid end -function partition_tripolar_metric(global_grid, metric_name, jrange) +function partition_tripolar_metric(global_grid, metric_name, irange, jrange) metric = getproperty(global_grid, metric_name) offsets = metric.offsets - partitioned_metric = metric[:, jrange].parent + partitioned_metric = metric[irange, jrange] + + if partitioned_metric isa OffsetArray + partitioned_metric = partitioned_metric.parent + end return OffsetArray(partitioned_metric, offsets...) end - ##### ##### Boundary condition extensions ##### +struct ZipperHaloCommunicationRanks{F, T, S} + from :: F + to :: T + sign :: S +end + +ZipperHaloCommunicationRanks(sign; from, to) = ZipperHaloCommunicationRanks(from, to, sign) + +Base.summary(hcr::ZipperHaloCommunicationRanks) = "ZipperHaloCommunicationRanks from rank $(hcr.from) to rank $(hcr.to)" + +# Finding out the paired rank to communicate the north boundary +# in case of a DistributedZipperBoundaryCondition using a "Handshake" procedure +function receiving_rank(arch; receive_idx_x = ranks(arch)[1] - arch.local_index[1] + 1) + + Ry = ranks(arch)[2] + receive_rank = 0 + + for rank in 0:prod(ranks(arch)) - 1 + my_x_idx = 0 + my_y_idx = 0 + + if arch.local_rank == rank + my_x_idx = arch.local_index[1] + my_y_idx = arch.local_index[2] + end + + x_idx = all_reduce(+, my_x_idx, arch) + y_idx = all_reduce(+, my_y_idx, arch) + + if x_idx == receive_idx_x && y_idx == Ry + receive_rank = rank + end + end + + return receive_rank +end + # a distributed `TripolarGrid` needs a `ZipperBoundaryCondition` for the north boundary # only on the last rank function regularize_field_boundary_conditions(bcs::FieldBoundaryConditions, - grid::DTRG, - field_name::Symbol, - prognostic_names=nothing) + grid::DTRG, + field_name::Symbol, + prognostic_names=nothing) arch = architecture(grid) loc = assumed_field_location(field_name) - rank = arch.local_rank - processor_size = ranks(arch.partition) + yrank = arch.local_index[2] - 1 + + processor_size = ranks(arch) sign = (field_name == :u) || (field_name == :v) ? -1 : 1 - west = regularize_boundary_condition(bcs.west, grid, loc, 1, LeftBoundary, prognostic_names) - east = regularize_boundary_condition(bcs.east, grid, loc, 1, RightBoundary, prognostic_names) + west = regularize_boundary_condition(bcs.west, grid, loc, 1, LeftBoundary, prognostic_names) + east = regularize_boundary_condition(bcs.east, grid, loc, 1, RightBoundary, prognostic_names) south = regularize_boundary_condition(bcs.south, grid, loc, 2, LeftBoundary, prognostic_names) - north = if rank == processor_size[2] - 1 + + north = if yrank == processor_size[2] - 1 && processor_size[1] == 1 ZipperBoundaryCondition(sign) + + elseif yrank == processor_size[2] - 1 && processor_size[1] != 1 + from = arch.local_rank + # Search the rank to send to + to = arch.connectivity.north + halo_communication = ZipperHaloCommunicationRanks(sign; from, to) + DistributedCommunicationBoundaryCondition(halo_communication) + else - regularize_boundary_condition(bcs.south, grid, loc, 2, RightBoundary, prognostic_names) + regularize_boundary_condition(bcs.north, grid, loc, 2, RightBoundary, prognostic_names) + end bottom = regularize_boundary_condition(bcs.bottom, grid, loc, 3, LeftBoundary, prognostic_names) @@ -164,8 +267,10 @@ end # with a sign that depends on the location of the field (revert the value of the halos if on edges, keep it if on nodes or centers) function Field((LX, LY, LZ)::Tuple, grid::DTRG, data, old_bcs, indices::Tuple, op, status) arch = architecture(grid) - rank = arch.local_rank - processor_size = ranks(arch.partition) + yrank = arch.local_index[2] - 1 + + processor_size = ranks(arch) + indices = validate_indices(indices, (LX, LY, LZ), grid) validate_field_data((LX, LY, LZ), data, grid, indices) validate_boundary_conditions((LX, LY, LZ), grid, old_bcs) @@ -180,27 +285,35 @@ function Field((LX, LY, LZ)::Tuple, grid::DTRG, data, old_bcs, indices::Tuple, o # the last rank, then we need to substitute the BC only if the old one is not already # a zipper boundary condition. Otherwise we always substitute because we need to # inject the halo boundary conditions. - if rank == processor_size[2] - 1 + if yrank == processor_size[2] - 1 && processor_size[1] == 1 north_bc = if !(old_bcs.north isa ZBC) default_zipper else old_bcs.north end + + elseif yrank == processor_size[2] - 1 && processor_size[1] != 1 + sgn = old_bcs.north isa ZBC ? old_bcs.north.condition : sign(LX, LY) + from = arch.local_rank + to = arch.connectivity.north + halo_communication = ZipperHaloCommunicationRanks(sgn; from, to) + north_bc = DistributedCommunicationBoundaryCondition(halo_communication) + else north_bc = new_bcs.north end new_bcs = FieldBoundaryConditions(; west=new_bcs.west, - east=new_bcs.east, - south=new_bcs.south, - north=north_bc, - top=new_bcs.top, - bottom=new_bcs.bottom) + east=new_bcs.east, + south=new_bcs.south, + north=north_bc, + top=new_bcs.top, + bottom=new_bcs.bottom) end buffers = FieldBoundaryBuffers(grid, data, new_bcs) - return Field{LX,LY,LZ}(grid, data, new_bcs, indices, op, status, buffers) + return Field{LX, LY, LZ}(grid, data, new_bcs, indices, op, status, buffers) end # Reconstruction the global tripolar grid for visualization purposes @@ -208,7 +321,7 @@ function reconstruct_global_grid(grid::DistributedTripolarGrid) arch = grid.architecture - n = size(grid) + n = Base.size(grid) halo = halo_size(grid) size = map(sum, concatenate_local_sizes(n, arch)) diff --git a/src/distributed_zipper.jl b/src/distributed_zipper.jl new file mode 100644 index 0000000..49cf566 --- /dev/null +++ b/src/distributed_zipper.jl @@ -0,0 +1,111 @@ +using Oceananigans.BoundaryConditions: fill_open_boundary_regions!, + permute_boundary_conditions, + fill_halo_event!, + DistributedCommunication + +using Oceananigans.DistributedComputations: cooperative_waitall!, + recv_from_buffers!, + fill_corners!, + loc_id, + DCBCT + +import Oceananigans.BoundaryConditions: fill_halo_regions! +import Oceananigans.DistributedComputations: synchronize_communication! + +@inline instantiate(T::DataType) = T() +@inline instantiate(T) = T + +const DistributedZipper = BoundaryCondition{<:DistributedCommunication, <:ZipperHaloCommunicationRanks} + +switch_north_halos!(c, north_bc, grid, loc) = nothing + +function switch_north_halos!(c, north_bc::DistributedZipper, grid, loc) + sign = north_bc.condition.sign + hz = halo_size(grid) + sz = size(grid) + + _switch_north_halos!(parent(c), loc, sign, sz, hz) + + return nothing +end + +@inline reversed_halos(::Tuple{<:Any, <:Center, <:Any}, Ny, Hy) = Ny+2Hy:-1:Ny+Hy+2 +@inline reversed_halos(::Tuple{<:Any, <:Face, <:Any}, Ny, Hy) = Ny+2Hy-1:-1:Ny+Hy+1 + +@inline west_corner_halos(::Tuple{<:Face, <:Any, <:Any}, Hx) = 2:Hx +@inline west_corner_halos(::Tuple{<:Center, <:Any, <:Any}, Hx) = 1:Hx + +# We throw away the first point! +@inline function _switch_north_halos!(c, loc, sign, (Nx, Ny, Nz), (Hx, Hy, Hz)) + + # Domain indices common for all locations + north_halos = Ny+Hy+1:Ny+2Hy-1 + east_corner = Nx+Hx+1:Nx+2Hx + interior = Hx+1:Nx+Hx + + # Location - dependent halo indices + reversed_north_halos = reversed_halos(loc, Ny, Hy) + west_corner = west_corner_halos(loc, Hx) + + view(c, west_corner, north_halos, :) .= sign .* reverse(view(c, west_corner, reversed_north_halos, :), dims = 1) + view(c, east_corner, north_halos, :) .= sign .* reverse(view(c, east_corner, reversed_north_halos, :), dims = 1) + view(c, interior, north_halos, :) .= sign .* reverse(view(c, interior, reversed_north_halos, :), dims = 1) + + return nothing +end + +function fill_halo_regions!(c::OffsetArray, bcs, indices, loc, grid::DTRG, buffers, args...; only_local_halos = false, fill_boundary_normal_velocities = true, kwargs...) + if fill_boundary_normal_velocities + fill_open_boundary_regions!(c, bcs, indices, loc, grid, args...; kwargs...) + end + + north_bc = bcs.north + + arch = architecture(grid) + fill_halos!, bcs = permute_boundary_conditions(bcs) + + number_of_tasks = length(fill_halos!) + + for task = 1:number_of_tasks + fill_halo_event!(c, fill_halos![task], bcs[task], indices, loc, arch, grid, buffers, args...; only_local_halos, kwargs...) + end + + fill_corners!(c, arch.connectivity, indices, loc, arch, grid, buffers, args...; only_local_halos, kwargs...) + + # We increment the tag counter only if we have actually initiated the MPI communication. + # This is the case only if at least one of the boundary conditions is a distributed communication + # boundary condition (DCBCT) _and_ the `only_local_halos` keyword argument is false. + increment_tag = any(isa.(bcs, DCBCT)) && !only_local_halos + + if increment_tag + arch.mpi_tag[] += 1 + end + + switch_north_halos!(c, north_bc, grid, loc) + + return nothing +end + +function synchronize_communication!(field::Field{<:Any, <:Any, <:Any, <:Any, <:DTRG}) + arch = architecture(field.grid) + + # Wait for outstanding requests + if !isempty(arch.mpi_requests) + cooperative_waitall!(arch.mpi_requests) + + # Reset MPI tag + arch.mpi_tag[] = 0 + + # Reset MPI requests + empty!(arch.mpi_requests) + end + + recv_from_buffers!(field.data, field.boundary_buffers, field.grid) + + north_bc = field.boundary_conditions.north + instantiated_location = map(instantiate, location(field)) + + switch_north_halos!(field, north_bc, field.grid, instantiated_location) + + return nothing +end \ No newline at end of file diff --git a/src/distributed_zipper_north_tags.jl b/src/distributed_zipper_north_tags.jl new file mode 100644 index 0000000..f1752cd --- /dev/null +++ b/src/distributed_zipper_north_tags.jl @@ -0,0 +1,60 @@ +import Oceananigans.DistributedComputations: north_recv_tag, + north_send_tag, + northwest_recv_tag, + northwest_send_tag, + northeast_recv_tag, + northeast_send_tag + +ID_DIGITS = 2 + +sides = (:west, :east, :south, :north, :southwest, :southeast, :northwest, :northeast) +side_id = Dict(side => n-1 for (n, side) in enumerate(sides)) + +# Change these and we are golden! +function north_recv_tag(arch, ::DTRG, location) + field_id = string(arch.mpi_tag[], pad=ID_DIGITS) + loc_digit = string(loc_id(location...), pad=ID_DIGITS) + last_rank = arch.local_index[2] == ranks(arch)[2] + side_digit = last_rank ? "8" : string(side_id[:south]) + return parse(Int, field_id * loc_digit * side_digit) +end + +function north_send_tag(arch, ::DTRG, location) + field_id = string(arch.mpi_tag[], pad=ID_DIGITS) + loc_digit = string(loc_id(location...), pad=ID_DIGITS) + last_rank = arch.local_index[2] == ranks(arch)[2] + side_digit = last_rank ? "8" : string(side_id[:north]) + return parse(Int, field_id * loc_digit * side_digit) +end + +function northwest_recv_tag(arch, ::DTRG, location) + field_id = string(arch.mpi_tag[], pad=ID_DIGITS) + loc_digit = string(loc_id(location...), pad=ID_DIGITS) + last_rank = arch.local_index[2] == ranks(arch)[2] + side_digit = last_rank ? "9" : string(side_id[:southeast]) + return parse(Int, field_id * loc_digit * side_digit) +end + +function northwest_send_tag(arch, ::DTRG, location) + field_id = string(arch.mpi_tag[], pad=ID_DIGITS) + loc_digit = string(loc_id(location...), pad=ID_DIGITS) + last_rank = arch.local_index[2] == ranks(arch)[2] + side_digit = last_rank ? "9" : string(side_id[:northwest]) + return parse(Int, field_id * loc_digit * side_digit) +end + +function northeast_recv_tag(arch, ::DTRG, location) + field_id = string(arch.mpi_tag[], pad=ID_DIGITS) + loc_digit = string(loc_id(location...), pad=ID_DIGITS) + last_rank = arch.local_index[2] == ranks(arch)[2] + side_digit = last_rank ? "10" : string(side_id[:southwest]) + return parse(Int, field_id * loc_digit * side_digit) +end + +function northeast_send_tag(arch, ::DTRG, location) + field_id = string(arch.mpi_tag[], pad=ID_DIGITS) + loc_digit = string(loc_id(location...), pad=ID_DIGITS) + last_rank = arch.local_index[2] == ranks(arch)[2] + side_digit = last_rank ? "10" : string(side_id[:northeast]) + return parse(Int, field_id * loc_digit * side_digit) +end diff --git a/src/grid_utils.jl b/src/grid_utils.jl index bed6ff3..50e52ab 100644 --- a/src/grid_utils.jl +++ b/src/grid_utils.jl @@ -38,26 +38,8 @@ end d = lat_lon_to_cartesian(φᶠᶠᵃ[ i , j+1], λᶠᶠᵃ[ i , j+1], 1) Azᶜᶜᵃ[i, j] = spherical_area_quadrilateral(a, b, c, d) * radius^2 - - a = lat_lon_to_cartesian(φᶜᶠᵃ[i-1, j ], λᶜᶠᵃ[i-1, j ], 1) - b = lat_lon_to_cartesian(φᶜᶠᵃ[ i , j ], λᶜᶠᵃ[ i , j ], 1) - c = lat_lon_to_cartesian(φᶜᶠᵃ[ i , j+1], λᶜᶠᵃ[ i , j+1], 1) - d = lat_lon_to_cartesian(φᶜᶠᵃ[i-1, j+1], λᶜᶠᵃ[i-1, j+1], 1) - - Azᶠᶜᵃ[i, j] = spherical_area_quadrilateral(a, b, c, d) * radius^2 - - a = lat_lon_to_cartesian(φᶠᶜᵃ[ i , j-1], λᶠᶜᵃ[ i , j-1], 1) - b = lat_lon_to_cartesian(φᶠᶜᵃ[i+1, j-1], λᶠᶜᵃ[i+1, j-1], 1) - c = lat_lon_to_cartesian(φᶠᶜᵃ[i+1, j ], λᶠᶜᵃ[i+1, j ], 1) - d = lat_lon_to_cartesian(φᶠᶜᵃ[ i , j ], λᶠᶜᵃ[ i , j ], 1) - - Azᶜᶠᵃ[i, j] = spherical_area_quadrilateral(a, b, c, d) * radius^2 - - a = lat_lon_to_cartesian(φᶜᶜᵃ[i-1, j-1], λᶜᶜᵃ[i-1, j-1], 1) - b = lat_lon_to_cartesian(φᶜᶜᵃ[ i , j-1], λᶜᶜᵃ[ i , j-1], 1) - c = lat_lon_to_cartesian(φᶜᶜᵃ[ i , j ], λᶜᶜᵃ[ i , j ], 1) - d = lat_lon_to_cartesian(φᶜᶜᵃ[i-1, j ], λᶜᶜᵃ[i-1, j ], 1) - - Azᶠᶠᵃ[i, j] = spherical_area_quadrilateral(a, b, c, d) * radius^2 + Azᶠᶜᵃ[i, j] = Δyᶠᶜᵃ[i, j] * Δxᶠᶜᵃ[i, j] + Azᶜᶠᵃ[i, j] = Δyᶜᶠᵃ[i, j] * Δxᶜᶠᵃ[i, j] + Azᶠᶠᵃ[i, j] = Δyᶠᶠᵃ[i, j] * Δxᶠᶠᵃ[i, j] end end diff --git a/src/split_explicit_free_surface.jl b/src/split_explicit_free_surface.jl index 9f7ce50..30d2fc6 100644 --- a/src/split_explicit_free_surface.jl +++ b/src/split_explicit_free_surface.jl @@ -53,7 +53,7 @@ function positive_zipper_boundary(default_field, grid::DTRG) arch = architecture(grid) workers = ranks(arch.partition) - if arch.local_rank == workers[2] - 1 + if arch.local_index[2] == workers[2] return FieldBoundaryConditions( top = nothing, bottom = nothing, @@ -76,7 +76,16 @@ end # We play the same trick as in the Distributed implementation and we extend the halos for # a split explicit barotropic solver on a tripolar grid. Only on the North boundary though! -@inline tripolar_split_explicit_halos(old_halos, step_halo) = old_halos[1], max(step_halo, old_halos[2]), old_halos[3] +@inline tripolar_split_explicit_halos(old_halos, step_halo, grid) = old_halos[1], max(step_halo, old_halos[2]), old_halos[3] + +@inline function tripolar_split_explicit_halos(old_halos, step_halo, grid::DTRG) + Rx, Ry, _ = architecture(grid).ranks + + Hx = Rx == 1 ? old_halos[1] : max(step_halo, old_halos[1]) + Hy = max(step_halo, old_halos[2]) # Always! + + return Hx, Hy, old_halos[3] +end # Internal function for HydrostaticFreeSurfaceModel function materialize_free_surface(free_surface::SplitExplicitFreeSurface, velocities, grid::TRG) @@ -86,7 +95,9 @@ function materialize_free_surface(free_surface::SplitExplicitFreeSurface, veloci old_halos = halo_size(grid) Nsubsteps = length(settings.substepping.averaging_weights) - extended_halos = tripolar_split_explicit_halos(old_halos, Nsubsteps+1) + # We need 1 additional halos in both directions because of the shifting + # caused by by the fill halo of the horizontal velocity. + extended_halos = tripolar_split_explicit_halos(old_halos, Nsubsteps+3, grid) extended_grid = with_halo(extended_halos, grid) Nze = size(extended_grid, 3) diff --git a/src/zipper_boundary_condition.jl b/src/zipper_boundary_condition.jl index 06811b1..d6c1539 100644 --- a/src/zipper_boundary_condition.jl +++ b/src/zipper_boundary_condition.jl @@ -74,9 +74,7 @@ validate_boundary_condition_location(bc::Zipper, loc::Face, side) = Hy = grid.Hy for j = 1 : Hy - @inbounds begin - c[i, Ny + j, k] = s * c[i′, Ny - j + 1, k] - end + @inbounds c[i, Ny + j, k] = s * c[i′, Ny - j + 1, k] end return nothing @@ -91,9 +89,7 @@ end Hy = grid.Hy for j = 1 : Hy - @inbounds begin - c[i, Ny + j, k] = s * c[i′, Ny - j, k] # The Ny line is duplicated so we substitute starting Ny-1 - end + @inbounds c[i, Ny + j, k] = s * c[i′, Ny - j, k] # The Ny line is duplicated so we substitute starting Ny-1 end return nothing @@ -106,9 +102,7 @@ end Hy = grid.Hy for j = 1 : Hy - @inbounds begin - c[i, Ny + j, k] = sign * c[i′, Ny - j + 1, k] - end + @inbounds c[i, Ny + j, k] = sign * c[i′, Ny - j + 1, k] end return nothing @@ -121,9 +115,7 @@ end Hy = grid.Hy for j = 1 : Hy - @inbounds begin - c[i, Ny + j, k] = sign * c[i′, Ny - j, k] # The Ny line is duplicated so we substitute starting Ny-1 - end + @inbounds c[i, Ny + j, k] = sign * c[i′, Ny - j, k] # The Ny line is duplicated so we substitute starting Ny-1 end return nothing diff --git a/test/dependencies_for_runtests.jl b/test/dependencies_for_runtests.jl new file mode 100644 index 0000000..eb903b1 --- /dev/null +++ b/test/dependencies_for_runtests.jl @@ -0,0 +1,28 @@ +using OrthogonalSphericalShellGrids +using Oceananigans +using Oceananigans.Grids: halo_size +using Oceananigans.Utils +using Oceananigans.Units +using Oceananigans.BoundaryConditions +using OrthogonalSphericalShellGrids: get_cartesian_nodes_and_vertices +using Oceananigans.CUDA +using Test + +using KernelAbstractions: @kernel, @index + +arch = CUDA.has_cuda_gpu() ? GPU() : CPU() + +# Mask the singularity of the grid in a region of +# 5 degrees radius around the singularities +function mask_singularities(underlying_grid::TripolarGrid) + λp = underlying_grid.conformal_mapping.first_pole_longitude + φp = underlying_grid.conformal_mapping.north_poles_latitude + + # We need a bottom height field that ``masks'' the singularities + bottom_height(λ, φ) = ((abs(λ - λp) < 5) & (abs(φp - φ) < 5)) | + ((abs(λ - λp - 180) < 5) & (abs(φp - φ) < 5)) | (φ < -80) ? 0 : - 1000 + + grid = ImmersedBoundaryGrid(underlying_grid, GridFittedBottom(bottom_height)) + + return grid +end diff --git a/test/distributed_tests_utils.jl b/test/distributed_tests_utils.jl new file mode 100644 index 0000000..c4b1c16 --- /dev/null +++ b/test/distributed_tests_utils.jl @@ -0,0 +1,58 @@ +using JLD2 +using MPI +using Oceananigans.DistributedComputations: reconstruct_global_field + +include("dependencies_for_runtests.jl") + +# Run the distributed grid simulation and save down reconstructed results +function run_distributed_tripolar_grid(arch, filename) + distributed_grid = TripolarGrid(arch; size = (100, 100, 1), z = (-1000, 0), halo = (5, 5, 5)) + distributed_grid = mask_singularities(distributed_grid) + simulation = run_tripolar_simulation(distributed_grid) + + η = reconstruct_global_field(simulation.model.free_surface.η) + u = reconstruct_global_field(simulation.model.velocities.u) + v = reconstruct_global_field(simulation.model.velocities.v) + c = reconstruct_global_field(simulation.model.tracers.c) + + fill_halo_regions!(η) + fill_halo_regions!(u) + fill_halo_regions!(v) + fill_halo_regions!(c) + + if arch.local_rank == 0 + jldsave(filename; η = interior(η, :, :, 1), + u = interior(u, :, :, 1), + v = interior(v, :, :, 1), + c = interior(c, :, :, 1)) + end + + MPI.Barrier(MPI.COMM_WORLD) + MPI.Finalize() + + return nothing +end + +# Just a random simulation on a tripolar grid +function run_tripolar_simulation(grid) + + model = HydrostaticFreeSurfaceModel(; grid = grid, + free_surface = SplitExplicitFreeSurface(grid; substeps = 20), + tracers = :c, + buoyancy = nothing, + tracer_advection = WENO(), + momentum_advection = VectorInvariant(), + coriolis = HydrostaticSphericalCoriolis()) + + # Setup the model with a gaussian sea surface height + # near the physical north poles and one near the equator + ηᵢ(λ, φ, z) = exp(- (φ - 90)^2 / 10^2) + exp(- φ^2 / 10^2) + + set!(model, η = ηᵢ, c = ηᵢ) + + simulation = Simulation(model, Δt = 5minutes, stop_iteration = 100) + + run!(simulation) + + return simulation +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 3a8c9b0..86dca76 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,16 +1,3 @@ -using OrthogonalSphericalShellGrids -using OrthogonalSphericalShellGrids.Oceananigans -using Oceananigans: GPU, CPU -using Oceananigans.CUDA -using Test - -arch = CUDA.has_cuda_gpu() ? GPU() : CPU() - -@testset "OrthogonalSphericalShellGrids.jl" begin - # We probably do not need any unit tests. - - # Test the grid? - grid = TripolarGrid(arch; size = (10, 10, 1)) - - # Test boundary conditions? -end +include("dependencies_for_runtests.jl") +include("distributed_tests_utils.jl") +include("test_distributed_tripolar.jl") diff --git a/test/test_distributed_tripolar.jl b/test/test_distributed_tripolar.jl new file mode 100644 index 0000000..3b14891 --- /dev/null +++ b/test/test_distributed_tripolar.jl @@ -0,0 +1,127 @@ +include("dependencies_for_runtests.jl") +include("distributed_tests_utils.jl") +using MPI + +@testset "Test distributed TripolarGrid boundary conditions..." begin + tripolar_boundary_conditions = """ + using MPI + MPI.Init() + + include("distributed_tests_utils.jl") + + arch = Distributed(CPU(), partition = Partition(2, 2)) + grid = TripolarGrid(arch; size = (20, 20, 1), z = (-1000, 0)) + + u = XFaceField(grid) + v = YFaceField(grid) + c = CenterField(grid) + + set!(u, (x, y, z) -> y) + set!(v, (x, y, z) -> y) + set!(c, (x, y, z) -> y) + + fill_halo_regions!((u, v, c)) + + jldopen("distributed_tripolar_boundary_conditions_" * string(arch.local_rank) * ".jld2", "w") do file + file["u"] = u.data + file["v"] = v.data + file["c"] = c.data + end + """ + + write("distributed_tests.jl", tripolar_boundary_conditions) + mpiexec(cmd -> run(`$cmd -n 4 julia --project distributed_tests.jl`)) + rm("distributed_tests.jl") + + # Run the serial computation + grid = TripolarGrid(size = (20, 20, 1), z = (-1000, 0)) + + u = XFaceField(grid) + v = YFaceField(grid) + c = CenterField(grid) + + set!(u, (x, y, z) -> y) + set!(v, (x, y, z) -> y) + set!(c, (x, y, z) -> y) + + fill_halo_regions!((u, v, c)) + + # Retrieve Parallel quantities from rank 1 (the north-west rank) + up1 = jldopen("distributed_tripolar_boundary_conditions_1.jld2")["u"]; + vp1 = jldopen("distributed_tripolar_boundary_conditions_1.jld2")["v"]; + cp1 = jldopen("distributed_tripolar_boundary_conditions_1.jld2")["c"]; + + # Retrieve Parallel quantities from rank 3 (the north-east rank) + up3 = jldopen("distributed_tripolar_boundary_conditions_3.jld2")["u"]; + vp3 = jldopen("distributed_tripolar_boundary_conditions_3.jld2")["v"]; + cp3 = jldopen("distributed_tripolar_boundary_conditions_3.jld2")["c"]; + + @test u.data[-2:14, 7:end-1, 1] ≈ up1.parent[2:end, 1:end-1, 5] + @test v.data[-3:14, 7:end-1, 1] ≈ vp1.parent[:, 1:end-1, 5] + @test c.data[-3:14, 7:end-1, 1] ≈ cp1.parent[:, 1:end-1, 5] + + @test us.data[8:end, 7:end-1, 1] ≈ up3[2:end, 1:end-1, 1] + @test vs.data[7:end, 7:end-1, 1] ≈ vp3[:, 1:end-1, 1].parent + @test cs.data[7:end, 7:end-1, 1] ≈ cp3[:, 1:end-1, 1].parent +end + +run_slab_distributed_grid = """ + using MPI + MPI.Init() + + include("distributed_tests_utils.jl") + arch = Distributed(CPU(), partition = Partition(1, 4)) + run_distributed_tripolar_grid(arch, "distributed_yslab_tripolar.jld2") +""" + +run_pencil_distributed_grid = """ + using MPI + MPI.Init() + + include("distributed_tests_utils.jl") + arch = Distributed(CPU(), partition = Partition(2, 2)) + run_distributed_tripolar_grid(arch, "distributed_pencil_tripolar.jld2") +""" + +@testset "Test distributed TripolarGrid simulations..." begin + # Run the distributed grid simulation + write("distributed_tests.jl", run_slab_distributed_grid) + mpiexec(cmd -> run(`$cmd -n 4 julia --project distributed_tests.jl`)) + rm("distributed_tests.jl") + + write("distributed_tests.jl", run_pencil_distributed_grid) + mpiexec(cmd -> run(`$cmd -n 4 julia --project distributed_tests.jl`)) + rm("distributed_tests.jl") + + # Run the serial computation + grid = TripolarGrid(size = (100, 100, 1), z = (-1000, 0)) + grid = mask_singularities(grid) + + simulation = run_tripolar_simulation(grid) + + # Retrieve Serial quantities + us, vs, ws = simulation.model.velocities + cs = simulation.model.tracers.c + ηs = simulation.model.free_surface.η + + # Retrieve Parallel quantities + up_slab = jldopen("distributed_slab_tripolar.jld2")["u"] + vp_slab = jldopen("distributed_slab_tripolar.jld2")["v"] + ηp_slab = jldopen("distributed_slab_tripolar.jld2")["η"] + cp_slab = jldopen("distributed_slab_tripolar.jld2")["c"] + + up_pencil = jldopen("distributed_pencil_tripolar.jld2")["u"] + vp_pencil = jldopen("distributed_pencil_tripolar.jld2")["v"] + ηp_pencil = jldopen("distributed_pencil_tripolar.jld2")["η"] + cp_pencil = jldopen("distributed_pencil_tripolar.jld2")["c"] + + @test interior(us, :, :, 1) ≈ up_slab + @test interior(vs, :, :, 1) ≈ vp_slab + @test interior(cs, :, :, 1) ≈ cp_slab + @test interior(ηs, :, :, 1) ≈ ηp_slab + + @test interior(us, :, :, 1) ≈ up_pencil + @test interior(vs, :, :, 1) ≈ vp_pencil + @test interior(cs, :, :, 1) ≈ cp_pencil + @test interior(ηs, :, :, 1) ≈ ηp_pencil +end \ No newline at end of file