From 5dadcf15fcebfd4a6b5f810019cb07f6213db1bd Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Tue, 20 Aug 2024 02:01:21 -0400 Subject: [PATCH 01/42] continuing tomorrow --- src/OrthogonalSphericalShellGrids.jl | 1 + src/distributed_tripolar_grid.jl | 167 +++++++++++++++------- src/distributed_zipper.jl | 199 +++++++++++++++++++++++++++ 3 files changed, 319 insertions(+), 48 deletions(-) create mode 100644 src/distributed_zipper.jl diff --git a/src/OrthogonalSphericalShellGrids.jl b/src/OrthogonalSphericalShellGrids.jl index 6679e1b..43f01f4 100644 --- a/src/OrthogonalSphericalShellGrids.jl +++ b/src/OrthogonalSphericalShellGrids.jl @@ -28,6 +28,7 @@ include("generate_tripolar_coordinates.jl") include("tripolar_grid.jl") include("grid_extensions.jl") include("distributed_tripolar_grid.jl") +include("boundary_buffers.jl") include("with_halo.jl") include("split_explicit_free_surface.jl") diff --git a/src/distributed_tripolar_grid.jl b/src/distributed_tripolar_grid.jl index 3387e57..ed9120a 100644 --- a/src/distributed_tripolar_grid.jl +++ b/src/distributed_tripolar_grid.jl @@ -41,39 +41,46 @@ function TripolarGrid(arch::Distributed, FT::DataType=Float64; lsize = local_size(arch, global_size) # Extracting the local range - nlocal = concatenate_local_sizes(lsize, arch, 2) - rank = arch.local_rank - - jstart = 1 + sum(nlocal[1:rank]) - jend = rank == workers[2] - 1 ? Ny : sum(nlocal[1:rank+1]) + nylocal = concatenate_local_sizes(lsize, arch, 2) + nxlocal = concatenate_local_sizes(lsize, arch, 1) + yrank = arch.local_index[2] - 1 + xrank = arch.local_index[1] - 1 + + jstart = 1 + sum(nylocal[1:yrank]) + jend = yrank == workers[2] - 1 ? Ny : sum(nlocal[1:yrank+1]) jrange = jstart-Hy:jend+Hy + istart = 1 + sum(nxlocal[1:xrank]) + iend = xrank == workers[1] - 1 ? Nx : sum(nlocal[1:xrank+1]) + irange = istart-Hx:iend+Hx + # Partitioning the Coordinates - λᶠᶠᵃ = partition_tripolar_metric(global_grid, :λᶠᶠᵃ, jrange) - φᶠᶠᵃ = partition_tripolar_metric(global_grid, :φᶠᶠᵃ, jrange) - λᶠᶜᵃ = partition_tripolar_metric(global_grid, :λᶠᶜᵃ, jrange) - φᶠᶜᵃ = partition_tripolar_metric(global_grid, :φᶠᶜᵃ, jrange) - λᶜᶠᵃ = partition_tripolar_metric(global_grid, :λᶜᶠᵃ, jrange) - φᶜᶠᵃ = partition_tripolar_metric(global_grid, :φᶜᶠᵃ, jrange) - λᶜᶜᵃ = partition_tripolar_metric(global_grid, :λᶜᶜᵃ, jrange) - φᶜᶜᵃ = partition_tripolar_metric(global_grid, :φᶜᶜᵃ, jrange) + λᶠᶠᵃ = partition_tripolar_metric(global_grid, :λᶠᶠᵃ, irange, jrange) + φᶠᶠᵃ = partition_tripolar_metric(global_grid, :φᶠᶠᵃ, irange, jrange) + λᶠᶜᵃ = partition_tripolar_metric(global_grid, :λᶠᶜᵃ, irange, jrange) + φᶠᶜᵃ = partition_tripolar_metric(global_grid, :φᶠᶜᵃ, irange, jrange) + λᶜᶠᵃ = partition_tripolar_metric(global_grid, :λᶜᶠᵃ, irange, jrange) + φᶜᶠᵃ = partition_tripolar_metric(global_grid, :φᶜᶠᵃ, irange, jrange) + λᶜᶜᵃ = partition_tripolar_metric(global_grid, :λᶜᶜᵃ, irange, jrange) + φᶜᶜᵃ = partition_tripolar_metric(global_grid, :φᶜᶜᵃ, irange, jrange) # Partitioning the Metrics - Δxᶜᶜᵃ = partition_tripolar_metric(global_grid, :Δxᶜᶜᵃ, jrange) - Δxᶠᶜᵃ = partition_tripolar_metric(global_grid, :Δxᶠᶜᵃ, jrange) - Δxᶜᶠᵃ = partition_tripolar_metric(global_grid, :Δxᶜᶠᵃ, jrange) - Δxᶠᶠᵃ = partition_tripolar_metric(global_grid, :Δxᶠᶠᵃ, jrange) - Δyᶜᶜᵃ = partition_tripolar_metric(global_grid, :Δyᶜᶜᵃ, jrange) - Δyᶠᶜᵃ = partition_tripolar_metric(global_grid, :Δyᶠᶜᵃ, jrange) - Δyᶜᶠᵃ = partition_tripolar_metric(global_grid, :Δyᶜᶠᵃ, jrange) - Δyᶠᶠᵃ = partition_tripolar_metric(global_grid, :Δyᶠᶠᵃ, jrange) - Azᶜᶜᵃ = partition_tripolar_metric(global_grid, :Azᶜᶜᵃ, jrange) - Azᶠᶜᵃ = partition_tripolar_metric(global_grid, :Azᶠᶜᵃ, jrange) - Azᶜᶠᵃ = partition_tripolar_metric(global_grid, :Azᶜᶠᵃ, jrange) - Azᶠᶠᵃ = partition_tripolar_metric(global_grid, :Azᶠᶠᵃ, jrange) - - LY = rank == 0 ? RightConnected : FullyConnected - ny = nlocal[rank+1] + Δxᶜᶜᵃ = partition_tripolar_metric(global_grid, :Δxᶜᶜᵃ, irange, jrange) + Δxᶠᶜᵃ = partition_tripolar_metric(global_grid, :Δxᶠᶜᵃ, irange, jrange) + Δxᶜᶠᵃ = partition_tripolar_metric(global_grid, :Δxᶜᶠᵃ, irange, jrange) + Δxᶠᶠᵃ = partition_tripolar_metric(global_grid, :Δxᶠᶠᵃ, irange, jrange) + Δyᶜᶜᵃ = partition_tripolar_metric(global_grid, :Δyᶜᶜᵃ, irange, jrange) + Δyᶠᶜᵃ = partition_tripolar_metric(global_grid, :Δyᶠᶜᵃ, irange, jrange) + Δyᶜᶠᵃ = partition_tripolar_metric(global_grid, :Δyᶜᶠᵃ, irange, jrange) + Δyᶠᶠᵃ = partition_tripolar_metric(global_grid, :Δyᶠᶠᵃ, irange, jrange) + Azᶜᶜᵃ = partition_tripolar_metric(global_grid, :Azᶜᶜᵃ, irange, jrange) + Azᶠᶜᵃ = partition_tripolar_metric(global_grid, :Azᶠᶜᵃ, irange, jrange) + Azᶜᶠᵃ = partition_tripolar_metric(global_grid, :Azᶜᶠᵃ, irange, jrange) + Azᶠᶠᵃ = partition_tripolar_metric(global_grid, :Azᶠᶠᵃ, irange, jrange) + + LY = yrank == 0 ? RightConnected : FullyConnected + ny = nylocal[yrank+1] + nx = nxlocal[xrank+1] zᵃᵃᶜ = global_grid.zᵃᵃᶜ zᵃᵃᶠ = global_grid.zᵃᵃᶠ @@ -82,7 +89,7 @@ function TripolarGrid(arch::Distributed, FT::DataType=Float64; radius = global_grid.radius grid = OrthogonalSphericalShellGrid{Periodic, LY, Bounded}(arch, - Nx, ny, Nz, + nx, ny, Nz, Hx, Hy, Hz, convert(eltype(radius), global_grid.Lz), on_architecture(arch, λᶜᶜᵃ), @@ -115,41 +122,95 @@ function TripolarGrid(arch::Distributed, FT::DataType=Float64; return grid end -function partition_tripolar_metric(global_grid, metric_name, jrange) +function partition_tripolar_metric(global_grid, metric_name, irange, jrange) metric = getproperty(global_grid, metric_name) offsets = metric.offsets - partitioned_metric = metric[:, jrange].parent + partitioned_metric = metric[irange, jrange] + + if partitioned_metric isa OffsetArray + partitioned_metric = partitioned_metric.parent + end return OffsetArray(partitioned_metric, offsets...) end - ##### ##### Boundary condition extensions ##### +struct ZipperHaloCommunicationRanks{F, T} + from :: F + to :: T + sign :: S +end + +ZipperHaloCommunicationRanks(sign; from, to) = HaloCommunicationRanks(from, to, sign) + +function receiving_rank(arch) + + receive_idx_x = ranks(arch)[1] - arch.local_index[1] + receive_idx_y = ranks(arch)[2] + + receive_rank = 0 + + for rank in 0:prod(ranks(arch)) - 1 + my_x_idx = 0 + my_y_idx = 0 + + if arch.local_rank == rank + my_x_idx = arch.local_index[1] + my_y_idx = arch.local_index[2] + end + + barrier!(arch) + + x_idx = all_reduce(+, my_x_idx, arch) + y_idx = all_reduce(+, my_x_idx, arch) + + if x_idx == receive_idx_x && y_idx == receive_idx_y + receive_rank = rank + end + + barrier!(arch) + end + + return receive_rank +end + # a distributed `TripolarGrid` needs a `ZipperBoundaryCondition` for the north boundary # only on the last rank function regularize_field_boundary_conditions(bcs::FieldBoundaryConditions, - grid::DTRG, - field_name::Symbol, - prognostic_names=nothing) + grid::DTRG, + field_name::Symbol, + prognostic_names=nothing) arch = architecture(grid) loc = assumed_field_location(field_name) - rank = arch.local_rank - processor_size = ranks(arch.partition) + xrank = arch.local_index[1] - 1 + yrank = arch.local_index[2] - 1 + + processor_size = ranks(arch) sign = (field_name == :u) || (field_name == :v) ? -1 : 1 - west = regularize_boundary_condition(bcs.west, grid, loc, 1, LeftBoundary, prognostic_names) - east = regularize_boundary_condition(bcs.east, grid, loc, 1, RightBoundary, prognostic_names) + west = regularize_boundary_condition(bcs.west, grid, loc, 1, LeftBoundary, prognostic_names) + east = regularize_boundary_condition(bcs.east, grid, loc, 1, RightBoundary, prognostic_names) south = regularize_boundary_condition(bcs.south, grid, loc, 2, LeftBoundary, prognostic_names) - north = if rank == processor_size[2] - 1 + + north = if yrank == processor_size[2] - 1 && processor_size[1] == 1 ZipperBoundaryCondition(sign) + + elseif yrank == processor_size[2] - 1 && processor_size[1] != 1 + from = arch.local_rank + # Search the rank to send to + to = receiving_rank(arch) + halo_communication = ZipperHaloCommunicationRanks(sign; from, to) + DistributedBoundaryCondition(halo_communication) + else - regularize_boundary_condition(bcs.south, grid, loc, 2, RightBoundary, prognostic_names) + regularize_boundary_condition(bcs.north, grid, loc, 2, RightBoundary, prognostic_names) + end bottom = regularize_boundary_condition(bcs.bottom, grid, loc, 3, LeftBoundary, prognostic_names) @@ -164,7 +225,9 @@ end # with a sign that depends on the location of the field (revert the value of the halos if on edges, keep it if on nodes or centers) function Field((LX, LY, LZ)::Tuple, grid::DTRG, data, old_bcs, indices::Tuple, op, status) arch = architecture(grid) - rank = arch.local_rank + xrank = arch.local_index[1] - 1 + yrank = arch.local_index[2] - 1 + processor_size = ranks(arch.partition) indices = validate_indices(indices, (LX, LY, LZ), grid) validate_field_data((LX, LY, LZ), data, grid, indices) @@ -180,22 +243,30 @@ function Field((LX, LY, LZ)::Tuple, grid::DTRG, data, old_bcs, indices::Tuple, o # the last rank, then we need to substitute the BC only if the old one is not already # a zipper boundary condition. Otherwise we always substitute because we need to # inject the halo boundary conditions. - if rank == processor_size[2] - 1 + if rank == processor_size[2] - 1 && processor_size[1] == 1 north_bc = if !(old_bcs.north isa ZBC) default_zipper else old_bcs.north end + + elseif rank == processor_size[2] - 1 && processor_size[1] != 1 + sign = old_bcs.north isa ZBC ? old_bcs.north.condition : sign(LX, LY) + from = arch.local_rank + to = receiving_rank(arch) + halo_communication = ZipperHaloCommunicationRanks(sign; from, to) + north_bc = DistributedBoundaryCondition(halo_communication) + else north_bc = new_bcs.north end new_bcs = FieldBoundaryConditions(; west=new_bcs.west, - east=new_bcs.east, - south=new_bcs.south, - north=north_bc, - top=new_bcs.top, - bottom=new_bcs.bottom) + east=new_bcs.east, + south=new_bcs.south, + north=north_bc, + top=new_bcs.top, + bottom=new_bcs.bottom) end buffers = FieldBoundaryBuffers(grid, data, new_bcs) diff --git a/src/distributed_zipper.jl b/src/distributed_zipper.jl new file mode 100644 index 0000000..92b61a2 --- /dev/null +++ b/src/distributed_zipper.jl @@ -0,0 +1,199 @@ +using Oceananigans.BoundaryConditions: fill_open_boundary_regions!, + permute_boundary_conditions, + fill_halo_event!, + fill_corners!, + DistributedCommunication + +using Oceananigans.DistributedComputations: cooperative_waitall!, + recv_from_buffers!, + loc_id + +import Oceananigans.BoundaryConditions: fill_halo_regions! +import Oceananigans.DistributedComputations: synchronize_communication +import Oceananigans.DistributedComputations: north_recv_tag, north_send_tag + +const DistributedZipper = BoundaryCondition{<:DistributedCommunication, <:ZipperHaloCommunicationRanks} + +ID_DIGITS = 2 + +sides = (:west, :east, :south, :north, :southwest, :southeast, :northwest, :northeast) +side_id = Dict(side => n-1 for (n, side) in enumerate(sides)) + +# Change these and we are golden! +function north_recv_tag(arch, grid::DTRG, location) + field_id = string(arch.mpi_tag[], pad=ID_DIGITS) + loc_digit = string(loc_id(location...), pad=ID_DIGITS) + last_rank = arch.local_index[2] == ranks(arch)[2] + side_digit = last_rank ? 9 : string(side_id[:south]) + return parse(Int, field_id * loc_digit * side_digit) +end + +function north_send_tag(arch, grid::DTRG, location) + field_id = string(arch.mpi_tag[], pad=ID_DIGITS) + loc_digit = string(loc_id(location...), pad=ID_DIGITS) + last_rank = arch.local_index[2] == ranks(arch)[2] + side_digit = last_rank ? 9 : string(side_id[:north]) + return parse(Int, field_id * loc_digit * side_digit) +end + +function northwest_recv_tag(arch, grid::DTRG, location) + field_id = string(arch.mpi_tag[], pad=ID_DIGITS) + loc_digit = string(loc_id(location...), pad=ID_DIGITS) + last_rank = arch.local_index[2] == ranks(arch)[2] + side_digit = last_rank ? 9 : string(side_id[:southeast]) + return parse(Int, field_id * loc_digit * side_digit) +end + +function northwest_send_tag(arch, grid::DTRG, location) + field_id = string(arch.mpi_tag[], pad=ID_DIGITS) + loc_digit = string(loc_id(location...), pad=ID_DIGITS) + last_rank = arch.local_index[2] == ranks(arch)[2] + side_digit = last_rank ? 9 : string(side_id[:northwest]) + return parse(Int, field_id * loc_digit * side_digit) +end + +function northeast_recv_tag(arch, grid::DTRG, location) + field_id = string(arch.mpi_tag[], pad=ID_DIGITS) + loc_digit = string(loc_id(location...), pad=ID_DIGITS) + last_rank = arch.local_index[2] == ranks(arch)[2] + side_digit = last_rank ? 9 : string(side_id[:southwest]) + return parse(Int, field_id * loc_digit * side_digit) +end + +function northeast_send_tag(arch, grid::DTRG, location) + field_id = string(arch.mpi_tag[], pad=ID_DIGITS) + loc_digit = string(loc_id(location...), pad=ID_DIGITS) + last_rank = arch.local_index[2] == ranks(arch)[2] + side_digit = last_rank ? 9 : string(side_id[:northeast]) + return parse(Int, field_id * loc_digit * side_digit) +end + +switch_north_halos!(c, north_bc, grid, loc) = nothing + +function switch_north_halos!(c, north_bc::DistributedZipper, grid, loc) + sign = north_bc.condition.sign + + params = ... + + launch!(architecture(grid), grid, params, grid, loc, sign, c) + + return nothing +end + +@kernel function _switch_north_halos!(i, k, grid, ::Tuple{<:Face, <:Face, <:Any}, sign, c) + Nx, Ny, _ = size(grid) + + i′ = Nx - i + 2 # Remember! element Nx + 1 does not exist! + s = ifelse(i′ > Nx , abs(sign), sign) # for periodic elements we change the sign + i′ = ifelse(i′ > Nx, i′ - Nx, i′) # Periodicity is hardcoded in the x-direction!! + Hy = grid.Hy + + for j = 1 : Hy + @inbounds begin + c[i, Ny + j, k] = s * c[i′, Ny - j + 1, k] + end + end + + return nothing +end + +@kernel function _switch_north_halos!(i, k, grid, ::Tuple{<:Face, <:Center, <:Any}, sign, c) + Nx, Ny, _ = size(grid) + + i′ = Nx - i + 2 # Remember! element Nx + 1 does not exist! + s = ifelse(i′ > Nx , abs(sign), sign) # for periodic elements we change the sign + i′ = ifelse(i′ > Nx, i′ - Nx, i′) # Periodicity is hardcoded in the x-direction!! + Hy = grid.Hy + + for j = 1 : Hy + @inbounds begin + c[i, Ny + j, k] = s * c[i′, Ny - j, k] # The Ny line is duplicated so we substitute starting Ny-1 + end + end + + return nothing +end + +@kernel function _switch_north_halos!(i, k, grid, ::Tuple{<:Center, <:Face, <:Any}, sign, c) + Nx, Ny, _ = size(grid) + + i′ = Nx - i + 1 + Hy = grid.Hy + + for j = 1 : Hy + @inbounds begin + c[i, Ny + j, k] = sign * c[i′, Ny - j + 1, k] + end + end + + return nothing +end + +@kernel function _switch_north_halos!(i, k, grid, ::Tuple{<:Center, <:Center, <:Any}, sign, c) + Nx, Ny, _ = size(grid) + + i′ = Nx - i + 1 + Hy = grid.Hy + + for j = 1 : Hy + @inbounds begin + c[i, Ny + j, k] = sign * c[i′, Ny - j, k] # The Ny line is duplicated so we substitute starting Ny-1 + end + end + + return nothing +end + +function fill_halo_regions!(c::OffsetArray, bcs, indices, loc, grid::DTRG, buffers, args...; fill_boundary_normal_velocities = true, kwargs...) + if fill_boundary_normal_velocities + fill_open_boundary_regions!(c, bcs, indices, loc, grid, args...; kwargs...) + end + + north_bc = bcs.north + + arch = architecture(grid) + fill_halos!, bcs = permute_boundary_conditions(bcs) + + number_of_tasks = length(fill_halos!) + + for task = 1:number_of_tasks + fill_halo_event!(c, fill_halos![task], bcs[task], indices, loc, arch, grid, buffers, args...; kwargs...) + end + + fill_corners!(c, arch.connectivity, indices, loc, arch, grid, buffers, args...; kwargs...) + + # We increment the tag counter only if we have actually initiated the MPI communication. + # This is the case only if at least one of the boundary conditions is a distributed communication + # boundary condition (DCBCT) _and_ the `only_local_halos` keyword argument is false. + increment_tag = any(isa.(bcs, DCBCT)) && !only_local_halos + + if increment_tag + arch.mpi_tag[] += 1 + end + + switch_north_halos!(parent(c), north_bc, grid, loc) + + return nothing +end + +function synchronize_communication!(field) + arch = architecture(field.grid) + + # Wait for outstanding requests + if !isempty(arch.mpi_requests) + cooperative_waitall!(arch.mpi_requests) + + # Reset MPI tag + arch.mpi_tag[] = 0 + + # Reset MPI requests + empty!(arch.mpi_requests) + end + + recv_from_buffers!(field.data, field.boundary_buffers, field.grid) + + north_bc = field.boundary_conditions.north + switch_north_halos!(parent(field.data), north_bc, field.grid, location(field)) + + return nothing +end \ No newline at end of file From b1215c296c9dc3e03266656240f2631abfbfae8b Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Tue, 20 Aug 2024 02:24:30 -0400 Subject: [PATCH 02/42] try it out tomorrow --- src/OrthogonalSphericalShellGrids.jl | 2 +- src/distributed_tripolar_grid.jl | 12 ++--- src/distributed_zipper.jl | 78 +++++++++++----------------- 3 files changed, 38 insertions(+), 54 deletions(-) diff --git a/src/OrthogonalSphericalShellGrids.jl b/src/OrthogonalSphericalShellGrids.jl index 43f01f4..45b1e44 100644 --- a/src/OrthogonalSphericalShellGrids.jl +++ b/src/OrthogonalSphericalShellGrids.jl @@ -28,7 +28,7 @@ include("generate_tripolar_coordinates.jl") include("tripolar_grid.jl") include("grid_extensions.jl") include("distributed_tripolar_grid.jl") -include("boundary_buffers.jl") +include("distributed_zipper.jl") include("with_halo.jl") include("split_explicit_free_surface.jl") diff --git a/src/distributed_tripolar_grid.jl b/src/distributed_tripolar_grid.jl index ed9120a..4b82d96 100644 --- a/src/distributed_tripolar_grid.jl +++ b/src/distributed_tripolar_grid.jl @@ -140,7 +140,7 @@ end ##### Boundary condition extensions ##### -struct ZipperHaloCommunicationRanks{F, T} +struct ZipperHaloCommunicationRanks{F, T, S} from :: F to :: T sign :: S @@ -227,7 +227,7 @@ function Field((LX, LY, LZ)::Tuple, grid::DTRG, data, old_bcs, indices::Tuple, o arch = architecture(grid) xrank = arch.local_index[1] - 1 yrank = arch.local_index[2] - 1 - + processor_size = ranks(arch.partition) indices = validate_indices(indices, (LX, LY, LZ), grid) validate_field_data((LX, LY, LZ), data, grid, indices) @@ -243,18 +243,18 @@ function Field((LX, LY, LZ)::Tuple, grid::DTRG, data, old_bcs, indices::Tuple, o # the last rank, then we need to substitute the BC only if the old one is not already # a zipper boundary condition. Otherwise we always substitute because we need to # inject the halo boundary conditions. - if rank == processor_size[2] - 1 && processor_size[1] == 1 + if yrank == processor_size[2] - 1 && processor_size[1] == 1 north_bc = if !(old_bcs.north isa ZBC) default_zipper else old_bcs.north end - elseif rank == processor_size[2] - 1 && processor_size[1] != 1 - sign = old_bcs.north isa ZBC ? old_bcs.north.condition : sign(LX, LY) + elseif yrank == processor_size[2] - 1 && processor_size[1] != 1 + sgn = old_bcs.north isa ZBC ? old_bcs.north.condition : sign(LX, LY) from = arch.local_rank to = receiving_rank(arch) - halo_communication = ZipperHaloCommunicationRanks(sign; from, to) + halo_communication = ZipperHaloCommunicationRanks(sgn; from, to) north_bc = DistributedBoundaryCondition(halo_communication) else diff --git a/src/distributed_zipper.jl b/src/distributed_zipper.jl index 92b61a2..e9f4e12 100644 --- a/src/distributed_zipper.jl +++ b/src/distributed_zipper.jl @@ -1,16 +1,25 @@ using Oceananigans.BoundaryConditions: fill_open_boundary_regions!, permute_boundary_conditions, fill_halo_event!, - fill_corners!, DistributedCommunication using Oceananigans.DistributedComputations: cooperative_waitall!, recv_from_buffers!, - loc_id + fill_corners!, + loc_id, + DCBCT + +using Oceananigans.Utils: KernelParameters import Oceananigans.BoundaryConditions: fill_halo_regions! -import Oceananigans.DistributedComputations: synchronize_communication -import Oceananigans.DistributedComputations: north_recv_tag, north_send_tag +import Oceananigans.DistributedComputations: synchronize_communication! + +import Oceananigans.DistributedComputations: north_recv_tag, + north_send_tag, + northwest_recv_tag, + northwest_send_tag, + northeast_recv_tag, + northeast_send_tag const DistributedZipper = BoundaryCondition{<:DistributedCommunication, <:ZipperHaloCommunicationRanks} @@ -24,7 +33,7 @@ function north_recv_tag(arch, grid::DTRG, location) field_id = string(arch.mpi_tag[], pad=ID_DIGITS) loc_digit = string(loc_id(location...), pad=ID_DIGITS) last_rank = arch.local_index[2] == ranks(arch)[2] - side_digit = last_rank ? 9 : string(side_id[:south]) + side_digit = last_rank ? 8 : string(side_id[:south]) return parse(Int, field_id * loc_digit * side_digit) end @@ -32,7 +41,7 @@ function north_send_tag(arch, grid::DTRG, location) field_id = string(arch.mpi_tag[], pad=ID_DIGITS) loc_digit = string(loc_id(location...), pad=ID_DIGITS) last_rank = arch.local_index[2] == ranks(arch)[2] - side_digit = last_rank ? 9 : string(side_id[:north]) + side_digit = last_rank ? 8 : string(side_id[:north]) return parse(Int, field_id * loc_digit * side_digit) end @@ -56,7 +65,7 @@ function northeast_recv_tag(arch, grid::DTRG, location) field_id = string(arch.mpi_tag[], pad=ID_DIGITS) loc_digit = string(loc_id(location...), pad=ID_DIGITS) last_rank = arch.local_index[2] == ranks(arch)[2] - side_digit = last_rank ? 9 : string(side_id[:southwest]) + side_digit = last_rank ? 10 : string(side_id[:southwest]) return parse(Int, field_id * loc_digit * side_digit) end @@ -64,84 +73,59 @@ function northeast_send_tag(arch, grid::DTRG, location) field_id = string(arch.mpi_tag[], pad=ID_DIGITS) loc_digit = string(loc_id(location...), pad=ID_DIGITS) last_rank = arch.local_index[2] == ranks(arch)[2] - side_digit = last_rank ? 9 : string(side_id[:northeast]) + side_digit = last_rank ? 10 : string(side_id[:northeast]) return parse(Int, field_id * loc_digit * side_digit) end switch_north_halos!(c, north_bc, grid, loc) = nothing function switch_north_halos!(c, north_bc::DistributedZipper, grid, loc) - sign = north_bc.condition.sign + sign = north_bc.condition.sign + Hx, Hy, _ = halo_size(grid) + Nx, Ny, Nz = size(grid) - params = ... + params = KernelParameters((Nx+2Hx, Nz), (-Hx, 0)) launch!(architecture(grid), grid, params, grid, loc, sign, c) return nothing end -@kernel function _switch_north_halos!(i, k, grid, ::Tuple{<:Face, <:Face, <:Any}, sign, c) - Nx, Ny, _ = size(grid) - - i′ = Nx - i + 2 # Remember! element Nx + 1 does not exist! - s = ifelse(i′ > Nx , abs(sign), sign) # for periodic elements we change the sign - i′ = ifelse(i′ > Nx, i′ - Nx, i′) # Periodicity is hardcoded in the x-direction!! - Hy = grid.Hy - - for j = 1 : Hy - @inbounds begin - c[i, Ny + j, k] = s * c[i′, Ny - j + 1, k] - end - end - return nothing -end - -@kernel function _switch_north_halos!(i, k, grid, ::Tuple{<:Face, <:Center, <:Any}, sign, c) +@kernel function _switch_north_halos!(grid, ::Tuple{<:Face, <:Center, <:Any}, sign, c) + i, k = @index(Global, NTuple) Nx, Ny, _ = size(grid) - i′ = Nx - i + 2 # Remember! element Nx + 1 does not exist! - s = ifelse(i′ > Nx , abs(sign), sign) # for periodic elements we change the sign - i′ = ifelse(i′ > Nx, i′ - Nx, i′) # Periodicity is hardcoded in the x-direction!! + i′ = Nx - i + 2 Hy = grid.Hy for j = 1 : Hy - @inbounds begin - c[i, Ny + j, k] = s * c[i′, Ny - j, k] # The Ny line is duplicated so we substitute starting Ny-1 - end + @inbounds c[i, Ny + j, k] = sign * c[i′, Ny - j, k] # The Ny line is duplicated so we substitute starting Ny-1 end - - return nothing end -@kernel function _switch_north_halos!(i, k, grid, ::Tuple{<:Center, <:Face, <:Any}, sign, c) +@kernel function _switch_north_halos!(grid, ::Tuple{<:Center, <:Face, <:Any}, sign, c) + i, k = @index(Global, NTuple) Nx, Ny, _ = size(grid) i′ = Nx - i + 1 Hy = grid.Hy for j = 1 : Hy - @inbounds begin - c[i, Ny + j, k] = sign * c[i′, Ny - j + 1, k] - end + @inbounds c[i, Ny + j, k] = sign * c[i′, Ny - j + 1, k] end - - return nothing end -@kernel function _switch_north_halos!(i, k, grid, ::Tuple{<:Center, <:Center, <:Any}, sign, c) +@kernel function _switch_north_halos!(grid, ::Tuple{<:Center, <:Center, <:Any}, sign, c) + i, k = @index(Global, NTuple) Nx, Ny, _ = size(grid) i′ = Nx - i + 1 Hy = grid.Hy for j = 1 : Hy - @inbounds begin - c[i, Ny + j, k] = sign * c[i′, Ny - j, k] # The Ny line is duplicated so we substitute starting Ny-1 - end + @inbounds c[i, Ny + j, k] = sign * c[i′, Ny - j, k] # The Ny line is duplicated so we substitute starting Ny-1 end - - return nothing end function fill_halo_regions!(c::OffsetArray, bcs, indices, loc, grid::DTRG, buffers, args...; fill_boundary_normal_velocities = true, kwargs...) From 75e4487200f5a5e2358d10842074b5ebc19582de Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Tue, 20 Aug 2024 02:25:33 -0400 Subject: [PATCH 03/42] should compile --- src/distributed_zipper.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/distributed_zipper.jl b/src/distributed_zipper.jl index e9f4e12..cafc38a 100644 --- a/src/distributed_zipper.jl +++ b/src/distributed_zipper.jl @@ -160,7 +160,7 @@ function fill_halo_regions!(c::OffsetArray, bcs, indices, loc, grid::DTRG, buffe return nothing end -function synchronize_communication!(field) +function synchronize_communication!(field::Field{<:Any, <:Any, <:Any, <:Any, <:DTRG}) arch = architecture(field.grid) # Wait for outstanding requests From 0f47da1de44ace1830c649de25e54bd5b42ab5de Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Tue, 20 Aug 2024 13:24:06 -0400 Subject: [PATCH 04/42] this should run! --- src/OrthogonalSphericalShellGrids.jl | 1 + src/distributed_tripolar_grid.jl | 120 +++++++++++++++++---------- src/distributed_zipper.jl | 88 ++++---------------- src/distributed_zipper_north_tags.jl | 60 ++++++++++++++ 4 files changed, 149 insertions(+), 120 deletions(-) create mode 100644 src/distributed_zipper_north_tags.jl diff --git a/src/OrthogonalSphericalShellGrids.jl b/src/OrthogonalSphericalShellGrids.jl index 45b1e44..7467049 100644 --- a/src/OrthogonalSphericalShellGrids.jl +++ b/src/OrthogonalSphericalShellGrids.jl @@ -29,6 +29,7 @@ include("tripolar_grid.jl") include("grid_extensions.jl") include("distributed_tripolar_grid.jl") include("distributed_zipper.jl") +include("distributed_zipper_north_tags.jl") include("with_halo.jl") include("split_explicit_free_surface.jl") diff --git a/src/distributed_tripolar_grid.jl b/src/distributed_tripolar_grid.jl index 4b82d96..fdc3298 100644 --- a/src/distributed_tripolar_grid.jl +++ b/src/distributed_tripolar_grid.jl @@ -1,6 +1,9 @@ +using MPI +using Oceananigans.BoundaryConditions: DistributedCommunicationBoundaryCondition using Oceananigans.DistributedComputations using Oceananigans.DistributedComputations: local_size, barrier!, + all_reduce, ranks, inject_halo_communication_boundary_conditions, concatenate_local_sizes @@ -27,8 +30,14 @@ function TripolarGrid(arch::Distributed, FT::DataType=Float64; workers = ranks(arch.partition) - workers[1] != 1 && - throw(ArgumentError("The tripolar grid is supported only on a Y-partitioning configuration")) + # Check that partitioning in x is correct: + try + if isodd(arch.partition.x) + throw(ArgumentError("The number of partitionsOnly even partitioning in x is supported with the TripolarGrid")) + end + catch + throw(ArgumentError("The x partition $(arch.partition.x) is not supported. The partition in x must be an even number. ")) + end Hx, Hy, Hz = halo @@ -46,12 +55,14 @@ function TripolarGrid(arch::Distributed, FT::DataType=Float64; yrank = arch.local_index[2] - 1 xrank = arch.local_index[1] - 1 + # The j-range jstart = 1 + sum(nylocal[1:yrank]) - jend = yrank == workers[2] - 1 ? Ny : sum(nlocal[1:yrank+1]) + jend = yrank == workers[2] - 1 ? Ny : sum(nylocal[1:yrank+1]) jrange = jstart-Hy:jend+Hy + # The i-range istart = 1 + sum(nxlocal[1:xrank]) - iend = xrank == workers[1] - 1 ? Nx : sum(nlocal[1:xrank+1]) + iend = xrank == workers[1] - 1 ? Nx : sum(nxlocal[1:xrank+1]) irange = istart-Hx:iend+Hx # Partitioning the Coordinates @@ -79,6 +90,7 @@ function TripolarGrid(arch::Distributed, FT::DataType=Float64; Azᶠᶠᵃ = partition_tripolar_metric(global_grid, :Azᶠᶠᵃ, irange, jrange) LY = yrank == 0 ? RightConnected : FullyConnected + LX = workers[1] == 1 ? Periodic : FullyConnected ny = nylocal[yrank+1] nx = nxlocal[xrank+1] @@ -88,36 +100,44 @@ function TripolarGrid(arch::Distributed, FT::DataType=Float64; Δzᵃᵃᶠ = global_grid.Δzᵃᵃᶠ radius = global_grid.radius - grid = OrthogonalSphericalShellGrid{Periodic, LY, Bounded}(arch, - nx, ny, Nz, - Hx, Hy, Hz, - convert(eltype(radius), global_grid.Lz), - on_architecture(arch, λᶜᶜᵃ), - on_architecture(arch, λᶠᶜᵃ), - on_architecture(arch, λᶜᶠᵃ), - on_architecture(arch, λᶠᶠᵃ), - on_architecture(arch, φᶜᶜᵃ), - on_architecture(arch, φᶠᶜᵃ), - on_architecture(arch, φᶜᶠᵃ), - on_architecture(arch, φᶠᶠᵃ), - on_architecture(arch, zᵃᵃᶜ), - on_architecture(arch, zᵃᵃᶠ), - on_architecture(arch, Δxᶜᶜᵃ), - on_architecture(arch, Δxᶠᶜᵃ), - on_architecture(arch, Δxᶜᶠᵃ), - on_architecture(arch, Δxᶠᶠᵃ), - on_architecture(arch, Δyᶜᶜᵃ), - on_architecture(arch, Δyᶜᶠᵃ), - on_architecture(arch, Δyᶠᶜᵃ), - on_architecture(arch, Δyᶠᶠᵃ), - on_architecture(arch, Δzᵃᵃᶜ), - on_architecture(arch, Δzᵃᵃᶠ), - on_architecture(arch, Azᶜᶜᵃ), - on_architecture(arch, Azᶠᶜᵃ), - on_architecture(arch, Azᶜᶠᵃ), - on_architecture(arch, Azᶠᶠᵃ), - radius, - global_grid.conformal_mapping) + # Make sure the northwest and northeast connectivities are correct + north_recv_rank = north_receiving_rank(arch) + + if yrank == workers[2] - 1 && workers[1] != 1 + arch.connectivity.northeast = north_recv_rank + arch.connectivity.northwest = north_recv_rank + end + + grid = OrthogonalSphericalShellGrid{LX, LY, Bounded}(arch, + nx, ny, Nz, + Hx, Hy, Hz, + convert(eltype(radius), global_grid.Lz), + on_architecture(arch, λᶜᶜᵃ), + on_architecture(arch, λᶠᶜᵃ), + on_architecture(arch, λᶜᶠᵃ), + on_architecture(arch, λᶠᶠᵃ), + on_architecture(arch, φᶜᶜᵃ), + on_architecture(arch, φᶠᶜᵃ), + on_architecture(arch, φᶜᶠᵃ), + on_architecture(arch, φᶠᶠᵃ), + on_architecture(arch, zᵃᵃᶜ), + on_architecture(arch, zᵃᵃᶠ), + on_architecture(arch, Δxᶜᶜᵃ), + on_architecture(arch, Δxᶠᶜᵃ), + on_architecture(arch, Δxᶜᶠᵃ), + on_architecture(arch, Δxᶠᶠᵃ), + on_architecture(arch, Δyᶜᶜᵃ), + on_architecture(arch, Δyᶜᶠᵃ), + on_architecture(arch, Δyᶠᶜᵃ), + on_architecture(arch, Δyᶠᶠᵃ), + on_architecture(arch, Δzᵃᵃᶜ), + on_architecture(arch, Δzᵃᵃᶠ), + on_architecture(arch, Azᶜᶜᵃ), + on_architecture(arch, Azᶠᶜᵃ), + on_architecture(arch, Azᶜᶠᵃ), + on_architecture(arch, Azᶠᶠᵃ), + radius, + global_grid.conformal_mapping) return grid end @@ -146,14 +166,17 @@ struct ZipperHaloCommunicationRanks{F, T, S} sign :: S end -ZipperHaloCommunicationRanks(sign; from, to) = HaloCommunicationRanks(from, to, sign) +ZipperHaloCommunicationRanks(sign; from, to) = ZipperHaloCommunicationRanks(from, to, sign) -function receiving_rank(arch) +Base.summary(hcr::ZipperHaloCommunicationRanks) = "ZipperHaloCommunicationRanks from rank $(hcr.from) to rank $(hcr.to)" - receive_idx_x = ranks(arch)[1] - arch.local_index[1] - receive_idx_y = ranks(arch)[2] +# Finding out the paired rank to communicate the north boundary +# in case of a DistributedZipperBoundaryCondition +function north_receiving_rank(arch) - receive_rank = 0 + receive_idx_x = ranks(arch)[1] - arch.local_index[1] + 1 + receive_idx_y = ranks(arch)[2] + receive_rank = 0 for rank in 0:prod(ranks(arch)) - 1 my_x_idx = 0 @@ -167,7 +190,7 @@ function receiving_rank(arch) barrier!(arch) x_idx = all_reduce(+, my_x_idx, arch) - y_idx = all_reduce(+, my_x_idx, arch) + y_idx = all_reduce(+, my_y_idx, arch) if x_idx == receive_idx_x && y_idx == receive_idx_y receive_rank = rank @@ -198,15 +221,17 @@ function regularize_field_boundary_conditions(bcs::FieldBoundaryConditions, east = regularize_boundary_condition(bcs.east, grid, loc, 1, RightBoundary, prognostic_names) south = regularize_boundary_condition(bcs.south, grid, loc, 2, LeftBoundary, prognostic_names) + north_recv_rank = north_receiving_rank(arch) + north = if yrank == processor_size[2] - 1 && processor_size[1] == 1 ZipperBoundaryCondition(sign) elseif yrank == processor_size[2] - 1 && processor_size[1] != 1 from = arch.local_rank # Search the rank to send to - to = receiving_rank(arch) + to = north_recv_rank halo_communication = ZipperHaloCommunicationRanks(sign; from, to) - DistributedBoundaryCondition(halo_communication) + DistributedCommunicationBoundaryCondition(halo_communication) else regularize_boundary_condition(bcs.north, grid, loc, 2, RightBoundary, prognostic_names) @@ -228,12 +253,15 @@ function Field((LX, LY, LZ)::Tuple, grid::DTRG, data, old_bcs, indices::Tuple, o xrank = arch.local_index[1] - 1 yrank = arch.local_index[2] - 1 - processor_size = ranks(arch.partition) + processor_size = ranks(arch) + indices = validate_indices(indices, (LX, LY, LZ), grid) validate_field_data((LX, LY, LZ), data, grid, indices) validate_boundary_conditions((LX, LY, LZ), grid, old_bcs) default_zipper = ZipperBoundaryCondition(sign(LX, LY)) + north_recv_rank = north_receiving_rank(arch) + if isnothing(old_bcs) || ismissing(old_bcs) new_bcs = old_bcs else @@ -253,9 +281,9 @@ function Field((LX, LY, LZ)::Tuple, grid::DTRG, data, old_bcs, indices::Tuple, o elseif yrank == processor_size[2] - 1 && processor_size[1] != 1 sgn = old_bcs.north isa ZBC ? old_bcs.north.condition : sign(LX, LY) from = arch.local_rank - to = receiving_rank(arch) + to = north_recv_rank halo_communication = ZipperHaloCommunicationRanks(sgn; from, to) - north_bc = DistributedBoundaryCondition(halo_communication) + north_bc = DistributedCommunicationBoundaryCondition(halo_communication) else north_bc = new_bcs.north @@ -271,7 +299,7 @@ function Field((LX, LY, LZ)::Tuple, grid::DTRG, data, old_bcs, indices::Tuple, o buffers = FieldBoundaryBuffers(grid, data, new_bcs) - return Field{LX,LY,LZ}(grid, data, new_bcs, indices, op, status, buffers) + return Field{LX, LY, LZ}(grid, data, new_bcs, indices, op, status, buffers) end # Reconstruction the global tripolar grid for visualization purposes diff --git a/src/distributed_zipper.jl b/src/distributed_zipper.jl index cafc38a..dd60b2a 100644 --- a/src/distributed_zipper.jl +++ b/src/distributed_zipper.jl @@ -14,69 +14,10 @@ using Oceananigans.Utils: KernelParameters import Oceananigans.BoundaryConditions: fill_halo_regions! import Oceananigans.DistributedComputations: synchronize_communication! -import Oceananigans.DistributedComputations: north_recv_tag, - north_send_tag, - northwest_recv_tag, - northwest_send_tag, - northeast_recv_tag, - northeast_send_tag +import Oceananigans.Fields: create_buffer_y, create_buffer_corner const DistributedZipper = BoundaryCondition{<:DistributedCommunication, <:ZipperHaloCommunicationRanks} -ID_DIGITS = 2 - -sides = (:west, :east, :south, :north, :southwest, :southeast, :northwest, :northeast) -side_id = Dict(side => n-1 for (n, side) in enumerate(sides)) - -# Change these and we are golden! -function north_recv_tag(arch, grid::DTRG, location) - field_id = string(arch.mpi_tag[], pad=ID_DIGITS) - loc_digit = string(loc_id(location...), pad=ID_DIGITS) - last_rank = arch.local_index[2] == ranks(arch)[2] - side_digit = last_rank ? 8 : string(side_id[:south]) - return parse(Int, field_id * loc_digit * side_digit) -end - -function north_send_tag(arch, grid::DTRG, location) - field_id = string(arch.mpi_tag[], pad=ID_DIGITS) - loc_digit = string(loc_id(location...), pad=ID_DIGITS) - last_rank = arch.local_index[2] == ranks(arch)[2] - side_digit = last_rank ? 8 : string(side_id[:north]) - return parse(Int, field_id * loc_digit * side_digit) -end - -function northwest_recv_tag(arch, grid::DTRG, location) - field_id = string(arch.mpi_tag[], pad=ID_DIGITS) - loc_digit = string(loc_id(location...), pad=ID_DIGITS) - last_rank = arch.local_index[2] == ranks(arch)[2] - side_digit = last_rank ? 9 : string(side_id[:southeast]) - return parse(Int, field_id * loc_digit * side_digit) -end - -function northwest_send_tag(arch, grid::DTRG, location) - field_id = string(arch.mpi_tag[], pad=ID_DIGITS) - loc_digit = string(loc_id(location...), pad=ID_DIGITS) - last_rank = arch.local_index[2] == ranks(arch)[2] - side_digit = last_rank ? 9 : string(side_id[:northwest]) - return parse(Int, field_id * loc_digit * side_digit) -end - -function northeast_recv_tag(arch, grid::DTRG, location) - field_id = string(arch.mpi_tag[], pad=ID_DIGITS) - loc_digit = string(loc_id(location...), pad=ID_DIGITS) - last_rank = arch.local_index[2] == ranks(arch)[2] - side_digit = last_rank ? 10 : string(side_id[:southwest]) - return parse(Int, field_id * loc_digit * side_digit) -end - -function northeast_send_tag(arch, grid::DTRG, location) - field_id = string(arch.mpi_tag[], pad=ID_DIGITS) - loc_digit = string(loc_id(location...), pad=ID_DIGITS) - last_rank = arch.local_index[2] == ranks(arch)[2] - side_digit = last_rank ? 10 : string(side_id[:northeast]) - return parse(Int, field_id * loc_digit * side_digit) -end - switch_north_halos!(c, north_bc, grid, loc) = nothing function switch_north_halos!(c, north_bc::DistributedZipper, grid, loc) @@ -84,14 +25,13 @@ function switch_north_halos!(c, north_bc::DistributedZipper, grid, loc) Hx, Hy, _ = halo_size(grid) Nx, Ny, Nz = size(grid) - params = KernelParameters((Nx+2Hx, Nz), (-Hx, 0)) + params = KernelParameters((Nx+2Hx-2, Nz), (-Hx+1, 0)) - launch!(architecture(grid), grid, params, grid, loc, sign, c) + launch!(architecture(grid), grid, params, _switch_north_halos!, grid, loc, sign, c) return nothing end - @kernel function _switch_north_halos!(grid, ::Tuple{<:Face, <:Center, <:Any}, sign, c) i, k = @index(Global, NTuple) Nx, Ny, _ = size(grid) @@ -99,8 +39,8 @@ end i′ = Nx - i + 2 Hy = grid.Hy - for j = 1 : Hy - @inbounds c[i, Ny + j, k] = sign * c[i′, Ny - j, k] # The Ny line is duplicated so we substitute starting Ny-1 + for j = 1 : Hy - 1 # TO CORRECTED!!! + @inbounds c[i, Ny + j, k] = sign * c[i′, Ny + Hy - j, k] end end @@ -111,8 +51,8 @@ end i′ = Nx - i + 1 Hy = grid.Hy - for j = 1 : Hy - @inbounds c[i, Ny + j, k] = sign * c[i′, Ny - j + 1, k] + for j = 1 : Hy - 1 + @inbounds c[i, Ny + j, k] = sign * c[i′, Ny + Hy - j + 1, k] end end @@ -123,12 +63,12 @@ end i′ = Nx - i + 1 Hy = grid.Hy - for j = 1 : Hy - @inbounds c[i, Ny + j, k] = sign * c[i′, Ny - j, k] # The Ny line is duplicated so we substitute starting Ny-1 + for j = 1 : Hy - 1 + @inbounds c[i, Ny + j, k] = sign * c[i′, Ny + Hy - j, k] end end -function fill_halo_regions!(c::OffsetArray, bcs, indices, loc, grid::DTRG, buffers, args...; fill_boundary_normal_velocities = true, kwargs...) +function fill_halo_regions!(c::OffsetArray, bcs, indices, loc, grid::DTRG, buffers, args...; only_local_halos = false, fill_boundary_normal_velocities = true, kwargs...) if fill_boundary_normal_velocities fill_open_boundary_regions!(c, bcs, indices, loc, grid, args...; kwargs...) end @@ -141,10 +81,10 @@ function fill_halo_regions!(c::OffsetArray, bcs, indices, loc, grid::DTRG, buffe number_of_tasks = length(fill_halos!) for task = 1:number_of_tasks - fill_halo_event!(c, fill_halos![task], bcs[task], indices, loc, arch, grid, buffers, args...; kwargs...) + fill_halo_event!(c, fill_halos![task], bcs[task], indices, loc, arch, grid, buffers, args...; only_local_halos, kwargs...) end - fill_corners!(c, arch.connectivity, indices, loc, arch, grid, buffers, args...; kwargs...) + fill_corners!(c, arch.connectivity, indices, loc, arch, grid, buffers, args...; only_local_halos, kwargs...) # We increment the tag counter only if we have actually initiated the MPI communication. # This is the case only if at least one of the boundary conditions is a distributed communication @@ -155,7 +95,7 @@ function fill_halo_regions!(c::OffsetArray, bcs, indices, loc, grid::DTRG, buffe arch.mpi_tag[] += 1 end - switch_north_halos!(parent(c), north_bc, grid, loc) + switch_north_halos!(c, north_bc, grid, loc) return nothing end @@ -177,7 +117,7 @@ function synchronize_communication!(field::Field{<:Any, <:Any, <:Any, <:Any, <:D recv_from_buffers!(field.data, field.boundary_buffers, field.grid) north_bc = field.boundary_conditions.north - switch_north_halos!(parent(field.data), north_bc, field.grid, location(field)) + switch_north_halos!(field, north_bc, field.grid, location(field)) return nothing end \ No newline at end of file diff --git a/src/distributed_zipper_north_tags.jl b/src/distributed_zipper_north_tags.jl new file mode 100644 index 0000000..8f401ae --- /dev/null +++ b/src/distributed_zipper_north_tags.jl @@ -0,0 +1,60 @@ +import Oceananigans.DistributedComputations: north_recv_tag, + north_send_tag, + northwest_recv_tag, + northwest_send_tag, + northeast_recv_tag, + northeast_send_tag + +ID_DIGITS = 2 + +sides = (:west, :east, :south, :north, :southwest, :southeast, :northwest, :northeast) +side_id = Dict(side => n-1 for (n, side) in enumerate(sides)) + +# Change these and we are golden! +function north_recv_tag(arch, grid::DTRG, location) + field_id = string(arch.mpi_tag[], pad=ID_DIGITS) + loc_digit = string(loc_id(location...), pad=ID_DIGITS) + last_rank = arch.local_index[2] == ranks(arch)[2] + side_digit = last_rank ? "8" : string(side_id[:south]) + return parse(Int, field_id * loc_digit * side_digit) +end + +function north_send_tag(arch, grid::DTRG, location) + field_id = string(arch.mpi_tag[], pad=ID_DIGITS) + loc_digit = string(loc_id(location...), pad=ID_DIGITS) + last_rank = arch.local_index[2] == ranks(arch)[2] + side_digit = last_rank ? "8" : string(side_id[:north]) + return parse(Int, field_id * loc_digit * side_digit) +end + +function northwest_recv_tag(arch, grid::DTRG, location) + field_id = string(arch.mpi_tag[], pad=ID_DIGITS) + loc_digit = string(loc_id(location...), pad=ID_DIGITS) + last_rank = arch.local_index[2] == ranks(arch)[2] + side_digit = last_rank ? "9" : string(side_id[:southeast]) + return parse(Int, field_id * loc_digit * side_digit) +end + +function northwest_send_tag(arch, grid::DTRG, location) + field_id = string(arch.mpi_tag[], pad=ID_DIGITS) + loc_digit = string(loc_id(location...), pad=ID_DIGITS) + last_rank = arch.local_index[2] == ranks(arch)[2] + side_digit = last_rank ? "9" : string(side_id[:northwest]) + return parse(Int, field_id * loc_digit * side_digit) +end + +function northeast_recv_tag(arch, grid::DTRG, location) + field_id = string(arch.mpi_tag[], pad=ID_DIGITS) + loc_digit = string(loc_id(location...), pad=ID_DIGITS) + last_rank = arch.local_index[2] == ranks(arch)[2] + side_digit = last_rank ? "10" : string(side_id[:southwest]) + return parse(Int, field_id * loc_digit * side_digit) +end + +function northeast_send_tag(arch, grid::DTRG, location) + field_id = string(arch.mpi_tag[], pad=ID_DIGITS) + loc_digit = string(loc_id(location...), pad=ID_DIGITS) + last_rank = arch.local_index[2] == ranks(arch)[2] + side_digit = last_rank ? "10" : string(side_id[:northeast]) + return parse(Int, field_id * loc_digit * side_digit) +end From 692477c4230a78effa7fa30517878e2ccc88ca26 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Tue, 20 Aug 2024 13:35:09 -0400 Subject: [PATCH 05/42] some more bugfixes --- src/distributed_zipper.jl | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/src/distributed_zipper.jl b/src/distributed_zipper.jl index dd60b2a..5babb11 100644 --- a/src/distributed_zipper.jl +++ b/src/distributed_zipper.jl @@ -20,14 +20,17 @@ const DistributedZipper = BoundaryCondition{<:DistributedCommunication, <:Zipper switch_north_halos!(c, north_bc, grid, loc) = nothing +@inline instantiate(T::DataType) = T() +@inline instantiate(T) = T + function switch_north_halos!(c, north_bc::DistributedZipper, grid, loc) sign = north_bc.condition.sign Hx, Hy, _ = halo_size(grid) Nx, Ny, Nz = size(grid) - params = KernelParameters((Nx+2Hx-2, Nz), (-Hx+1, 0)) + params = KernelParameters((Nx+2Hx-2, Nz), (0, 0)) - launch!(architecture(grid), grid, params, _switch_north_halos!, grid, loc, sign, c) + launch!(architecture(grid), grid, params, _switch_north_halos!, grid, loc, sign, parent(c)) return nothing end @@ -35,36 +38,37 @@ end @kernel function _switch_north_halos!(grid, ::Tuple{<:Face, <:Center, <:Any}, sign, c) i, k = @index(Global, NTuple) Nx, Ny, _ = size(grid) + Hx, Hy, _ = halo_size(grid) - i′ = Nx - i + 2 - Hy = grid.Hy + i′ = Nx + 2Hx - i + 2 - 2 - for j = 1 : Hy - 1 # TO CORRECTED!!! - @inbounds c[i, Ny + j, k] = sign * c[i′, Ny + Hy - j, k] + for j = 1 : Hy + @inbounds c[i, Ny + Hy + j, k] = sign * c[i′, Ny + 2Hy - j, k] end end @kernel function _switch_north_halos!(grid, ::Tuple{<:Center, <:Face, <:Any}, sign, c) i, k = @index(Global, NTuple) Nx, Ny, _ = size(grid) + Hx, Hy, _ = halo_size(grid) - i′ = Nx - i + 1 - Hy = grid.Hy + i′ = Nx + 2Hx - i + 1 - 2 for j = 1 : Hy - 1 - @inbounds c[i, Ny + j, k] = sign * c[i′, Ny + Hy - j + 1, k] + @inbounds c[i, Ny + Hy + j, k] = sign * c[i′, Ny + 2Hy - j + 1, k] end end @kernel function _switch_north_halos!(grid, ::Tuple{<:Center, <:Center, <:Any}, sign, c) i, k = @index(Global, NTuple) Nx, Ny, _ = size(grid) + Hx, Hy, _ = halo_size(grid) - i′ = Nx - i + 1 + i′ = Nx + 2Hx - i + 1 - 2 Hy = grid.Hy - for j = 1 : Hy - 1 - @inbounds c[i, Ny + j, k] = sign * c[i′, Ny + Hy - j, k] + for j = 1 : Hy + @inbounds c[i, Ny + Hy + j, k] = sign * c[i′, Ny + 2Hy - j, k] end end @@ -117,7 +121,8 @@ function synchronize_communication!(field::Field{<:Any, <:Any, <:Any, <:Any, <:D recv_from_buffers!(field.data, field.boundary_buffers, field.grid) north_bc = field.boundary_conditions.north - switch_north_halos!(field, north_bc, field.grid, location(field)) + instantiated_location = map(instantiate, location(field)) + switch_north_halos!(field, north_bc, field.grid, instantiated_location) return nothing end \ No newline at end of file From 8f877b13c2074b09e2fba93b6f18615dd764a44c Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Tue, 20 Aug 2024 15:08:37 -0400 Subject: [PATCH 06/42] try this on tartarus --- src/distributed_tripolar_grid.jl | 42 +++++++++++++++++++++++------- src/distributed_zipper.jl | 13 +++++++++ src/split_explicit_free_surface.jl | 13 +++++++-- 3 files changed, 56 insertions(+), 12 deletions(-) diff --git a/src/distributed_tripolar_grid.jl b/src/distributed_tripolar_grid.jl index fdc3298..8414338 100644 --- a/src/distributed_tripolar_grid.jl +++ b/src/distributed_tripolar_grid.jl @@ -100,14 +100,36 @@ function TripolarGrid(arch::Distributed, FT::DataType=Float64; Δzᵃᵃᶠ = global_grid.Δzᵃᵃᶠ radius = global_grid.radius - # Make sure the northwest and northeast connectivities are correct - north_recv_rank = north_receiving_rank(arch) + # Fix corners halos passing in case workers[1] != 1 + if workers[1] != 1 + northwest_idx_x = ranks(arch)[1] - arch.local_index[1] + 2 + northeast_idx_x = ranks(arch)[1] - arch.local_index[1] - if yrank == workers[2] - 1 && workers[1] != 1 - arch.connectivity.northeast = north_recv_rank - arch.connectivity.northwest = north_recv_rank + if northwest_idx_x > workers[1] + northwest_idx_x = northwest_idx_x - 1 + end + + if northeast_idx_x < 1 + northwest_idx_x = 1 + end + + # Make sure the northwest and northeast connectivities are correct + northwest_recv_rank = receiving_rank(arch) #; receive_idx_x = northwest_idx_x) + northeast_recv_rank = receiving_rank(arch) #; receive_idx_x = northeast_idx_x) + + if yrank == workers[2] - 1 + arch.connectivity.northeast = northwest_recv_rank + arch.connectivity.northwest = northeast_recv_rank + end end + # for r in 0:3 + # if arch.local_rank == r + # @show arch.local_rank, arch.connectivity, northwest_recv_rank, northeast_recv_rank + # end + # barrier!(arch) + # end + grid = OrthogonalSphericalShellGrid{LX, LY, Bounded}(arch, nx, ny, Nz, Hx, Hy, Hz, @@ -172,10 +194,10 @@ Base.summary(hcr::ZipperHaloCommunicationRanks) = "ZipperHaloCommunicationRanks # Finding out the paired rank to communicate the north boundary # in case of a DistributedZipperBoundaryCondition -function north_receiving_rank(arch) +function receiving_rank(arch; + receive_idx_x = ranks(arch)[1] - arch.local_index[1] + 1, + receive_idx_y = ranks(arch)[2]) - receive_idx_x = ranks(arch)[1] - arch.local_index[1] + 1 - receive_idx_y = ranks(arch)[2] receive_rank = 0 for rank in 0:prod(ranks(arch)) - 1 @@ -221,7 +243,7 @@ function regularize_field_boundary_conditions(bcs::FieldBoundaryConditions, east = regularize_boundary_condition(bcs.east, grid, loc, 1, RightBoundary, prognostic_names) south = regularize_boundary_condition(bcs.south, grid, loc, 2, LeftBoundary, prognostic_names) - north_recv_rank = north_receiving_rank(arch) + north_recv_rank = receiving_rank(arch) north = if yrank == processor_size[2] - 1 && processor_size[1] == 1 ZipperBoundaryCondition(sign) @@ -260,7 +282,7 @@ function Field((LX, LY, LZ)::Tuple, grid::DTRG, data, old_bcs, indices::Tuple, o validate_boundary_conditions((LX, LY, LZ), grid, old_bcs) default_zipper = ZipperBoundaryCondition(sign(LX, LY)) - north_recv_rank = north_receiving_rank(arch) + north_recv_rank = receiving_rank(arch) if isnothing(old_bcs) || ismissing(old_bcs) new_bcs = old_bcs diff --git a/src/distributed_zipper.jl b/src/distributed_zipper.jl index 5babb11..7b69164 100644 --- a/src/distributed_zipper.jl +++ b/src/distributed_zipper.jl @@ -35,6 +35,18 @@ function switch_north_halos!(c, north_bc::DistributedZipper, grid, loc) return nothing end +@kernel function _switch_north_halos!(grid, ::Tuple{<:Face, <:Face, <:Any}, sign, c) + i, k = @index(Global, NTuple) + Nx, Ny, _ = size(grid) + Hx, Hy, _ = halo_size(grid) + + i′ = Nx + 2Hx - i + 2 - 2 + + for j = 1 : Hy + @inbounds c[i, Ny + Hy + j, k] = sign * c[i′, Ny + 2Hy - j + 1, k] + end +end + @kernel function _switch_north_halos!(grid, ::Tuple{<:Face, <:Center, <:Any}, sign, c) i, k = @index(Global, NTuple) Nx, Ny, _ = size(grid) @@ -101,6 +113,7 @@ function fill_halo_regions!(c::OffsetArray, bcs, indices, loc, grid::DTRG, buffe switch_north_halos!(c, north_bc, grid, loc) + # @show arch.local_rank "finished communication" return nothing end diff --git a/src/split_explicit_free_surface.jl b/src/split_explicit_free_surface.jl index 9f7ce50..543a416 100644 --- a/src/split_explicit_free_surface.jl +++ b/src/split_explicit_free_surface.jl @@ -76,7 +76,16 @@ end # We play the same trick as in the Distributed implementation and we extend the halos for # a split explicit barotropic solver on a tripolar grid. Only on the North boundary though! -@inline tripolar_split_explicit_halos(old_halos, step_halo) = old_halos[1], max(step_halo, old_halos[2]), old_halos[3] +@inline tripolar_split_explicit_halos(old_halos, step_halo, grid) = old_halos[1], max(step_halo, old_halos[2]), old_halos[3] + +@inline function tripolar_split_explicit_halos(old_halos, step_halo, grid::DTRG) + Rx, Ry, _ = architecture(grid).ranks + + Hx = Rx == 1 ? old_halos[1] : max(step_halo, old_halos[1]) + Hy = max(step_halo, old_halos[2]) # Always! + + return Hx, Hy, old_halos[3] +end # Internal function for HydrostaticFreeSurfaceModel function materialize_free_surface(free_surface::SplitExplicitFreeSurface, velocities, grid::TRG) @@ -86,7 +95,7 @@ function materialize_free_surface(free_surface::SplitExplicitFreeSurface, veloci old_halos = halo_size(grid) Nsubsteps = length(settings.substepping.averaging_weights) - extended_halos = tripolar_split_explicit_halos(old_halos, Nsubsteps+1) + extended_halos = tripolar_split_explicit_halos(old_halos, Nsubsteps+1, grid) extended_grid = with_halo(extended_halos, grid) Nze = size(extended_grid, 3) From 841d8d9f2e78b6ec5d31956c73cda8971fbfa141 Mon Sep 17 00:00:00 2001 From: simone-silvestri Date: Tue, 20 Aug 2024 18:51:03 -0400 Subject: [PATCH 07/42] this might work! --- src/distributed_tripolar_grid.jl | 23 ++++----- src/distributed_zipper.jl | 71 +++++++--------------------- src/distributed_zipper_north_tags.jl | 12 ++--- 3 files changed, 31 insertions(+), 75 deletions(-) diff --git a/src/distributed_tripolar_grid.jl b/src/distributed_tripolar_grid.jl index 8414338..dd86372 100644 --- a/src/distributed_tripolar_grid.jl +++ b/src/distributed_tripolar_grid.jl @@ -104,18 +104,18 @@ function TripolarGrid(arch::Distributed, FT::DataType=Float64; if workers[1] != 1 northwest_idx_x = ranks(arch)[1] - arch.local_index[1] + 2 northeast_idx_x = ranks(arch)[1] - arch.local_index[1] - + if northwest_idx_x > workers[1] - northwest_idx_x = northwest_idx_x - 1 + northwest_idx_x = arch.local_index[1] end if northeast_idx_x < 1 - northwest_idx_x = 1 + northeast_idx_x = arch.local_index[1] end # Make sure the northwest and northeast connectivities are correct - northwest_recv_rank = receiving_rank(arch) #; receive_idx_x = northwest_idx_x) - northeast_recv_rank = receiving_rank(arch) #; receive_idx_x = northeast_idx_x) + northwest_recv_rank = receiving_rank(arch; receive_idx_x = northwest_idx_x) + northeast_recv_rank = receiving_rank(arch; receive_idx_x = northeast_idx_x) if yrank == workers[2] - 1 arch.connectivity.northeast = northwest_recv_rank @@ -123,13 +123,6 @@ function TripolarGrid(arch::Distributed, FT::DataType=Float64; end end - # for r in 0:3 - # if arch.local_rank == r - # @show arch.local_rank, arch.connectivity, northwest_recv_rank, northeast_recv_rank - # end - # barrier!(arch) - # end - grid = OrthogonalSphericalShellGrid{LX, LY, Bounded}(arch, nx, ny, Nz, Hx, Hy, Hz, @@ -195,9 +188,9 @@ Base.summary(hcr::ZipperHaloCommunicationRanks) = "ZipperHaloCommunicationRanks # Finding out the paired rank to communicate the north boundary # in case of a DistributedZipperBoundaryCondition function receiving_rank(arch; - receive_idx_x = ranks(arch)[1] - arch.local_index[1] + 1, - receive_idx_y = ranks(arch)[2]) + receive_idx_x = ranks(arch)[1] - arch.local_index[1] + 1) + Ry = ranks(arch)[2] receive_rank = 0 for rank in 0:prod(ranks(arch)) - 1 @@ -214,7 +207,7 @@ function receiving_rank(arch; x_idx = all_reduce(+, my_x_idx, arch) y_idx = all_reduce(+, my_y_idx, arch) - if x_idx == receive_idx_x && y_idx == receive_idx_y + if x_idx == receive_idx_x && y_idx == Ry receive_rank = rank end diff --git a/src/distributed_zipper.jl b/src/distributed_zipper.jl index 7b69164..5f0dd51 100644 --- a/src/distributed_zipper.jl +++ b/src/distributed_zipper.jl @@ -9,8 +9,6 @@ using Oceananigans.DistributedComputations: cooperative_waitall!, loc_id, DCBCT -using Oceananigans.Utils: KernelParameters - import Oceananigans.BoundaryConditions: fill_halo_regions! import Oceananigans.DistributedComputations: synchronize_communication! @@ -24,65 +22,31 @@ switch_north_halos!(c, north_bc, grid, loc) = nothing @inline instantiate(T) = T function switch_north_halos!(c, north_bc::DistributedZipper, grid, loc) - sign = north_bc.condition.sign - Hx, Hy, _ = halo_size(grid) - Nx, Ny, Nz = size(grid) - - params = KernelParameters((Nx+2Hx-2, Nz), (0, 0)) + sign = north_bc.condition.sign + Hy = halo_size(grid)[2] + Ny = size(grid)[2] + sz = size(parent(c)) - launch!(architecture(grid), grid, params, _switch_north_halos!, grid, loc, sign, parent(c)) + _switch_north_halos!(parent(c), loc, sign, sz, Ny, Hy) return nothing end -@kernel function _switch_north_halos!(grid, ::Tuple{<:Face, <:Face, <:Any}, sign, c) - i, k = @index(Global, NTuple) - Nx, Ny, _ = size(grid) - Hx, Hy, _ = halo_size(grid) - - i′ = Nx + 2Hx - i + 2 - 2 - - for j = 1 : Hy - @inbounds c[i, Ny + Hy + j, k] = sign * c[i′, Ny + 2Hy - j + 1, k] - end -end +# We throw away the first point! +_switch_north_halos!(c, ::Tuple{<:Center, <:Center, <:Any}, sign, sz, Ny, Hy) = + view(c, :, Ny+Hy+1:Ny+2Hy-1, :) .= sign .* reverse(view(c, :, Ny+2Hy:-1:Ny+Hy+2, :), dims = 1) -@kernel function _switch_north_halos!(grid, ::Tuple{<:Face, <:Center, <:Any}, sign, c) - i, k = @index(Global, NTuple) - Nx, Ny, _ = size(grid) - Hx, Hy, _ = halo_size(grid) - - i′ = Nx + 2Hx - i + 2 - 2 - - for j = 1 : Hy - @inbounds c[i, Ny + Hy + j, k] = sign * c[i′, Ny + 2Hy - j, k] - end -end +# We do not throw away the first point! +_switch_north_halos!(c, ::Tuple{<:Center, <:Face, <:Any}, sign, sz, Ny, Hy) = + view(c, :, Ny+Hy+1:Ny+2Hy, :) .= sign .* reverse(view(c, :, Ny+2Hy:-1:Ny+Hy+1, :), dims = 1) -@kernel function _switch_north_halos!(grid, ::Tuple{<:Center, <:Face, <:Any}, sign, c) - i, k = @index(Global, NTuple) - Nx, Ny, _ = size(grid) - Hx, Hy, _ = halo_size(grid) - - i′ = Nx + 2Hx - i + 1 - 2 - - for j = 1 : Hy - 1 - @inbounds c[i, Ny + Hy + j, k] = sign * c[i′, Ny + 2Hy - j + 1, k] - end -end +# We throw away the first line and the first point! +_switch_north_halos!(c, ::Tuple{<:Face, <:Center, <:Any}, sign, (Px, Py, Pz), Ny, Hy) = + view(c, :, Ny+Hy+1:Ny+2Hy-1, :) .= sign .* reverse(view(c, :, Ny+2Hy:-1:Ny+Hy+2, :), dims = 1) -@kernel function _switch_north_halos!(grid, ::Tuple{<:Center, <:Center, <:Any}, sign, c) - i, k = @index(Global, NTuple) - Nx, Ny, _ = size(grid) - Hx, Hy, _ = halo_size(grid) - - i′ = Nx + 2Hx - i + 1 - 2 - Hy = grid.Hy - - for j = 1 : Hy - @inbounds c[i, Ny + Hy + j, k] = sign * c[i′, Ny + 2Hy - j, k] - end -end +# We throw away the first line but not the first point! +_switch_north_halos!(c, ::Tuple{<:Face, <:Face, <:Any}, sign, (Px, Py, Pz), Ny, Hy) = + view(c, :, Ny+Hy+1:Ny+2Hy, :) .= sign .* reverse(view(c, :, Ny+2Hy:-1:Ny+Hy+1, :), dims = 1) function fill_halo_regions!(c::OffsetArray, bcs, indices, loc, grid::DTRG, buffers, args...; only_local_halos = false, fill_boundary_normal_velocities = true, kwargs...) if fill_boundary_normal_velocities @@ -113,7 +77,6 @@ function fill_halo_regions!(c::OffsetArray, bcs, indices, loc, grid::DTRG, buffe switch_north_halos!(c, north_bc, grid, loc) - # @show arch.local_rank "finished communication" return nothing end diff --git a/src/distributed_zipper_north_tags.jl b/src/distributed_zipper_north_tags.jl index 8f401ae..f1752cd 100644 --- a/src/distributed_zipper_north_tags.jl +++ b/src/distributed_zipper_north_tags.jl @@ -11,7 +11,7 @@ sides = (:west, :east, :south, :north, :southwest, :southeast, :northwest, :nor side_id = Dict(side => n-1 for (n, side) in enumerate(sides)) # Change these and we are golden! -function north_recv_tag(arch, grid::DTRG, location) +function north_recv_tag(arch, ::DTRG, location) field_id = string(arch.mpi_tag[], pad=ID_DIGITS) loc_digit = string(loc_id(location...), pad=ID_DIGITS) last_rank = arch.local_index[2] == ranks(arch)[2] @@ -19,7 +19,7 @@ function north_recv_tag(arch, grid::DTRG, location) return parse(Int, field_id * loc_digit * side_digit) end -function north_send_tag(arch, grid::DTRG, location) +function north_send_tag(arch, ::DTRG, location) field_id = string(arch.mpi_tag[], pad=ID_DIGITS) loc_digit = string(loc_id(location...), pad=ID_DIGITS) last_rank = arch.local_index[2] == ranks(arch)[2] @@ -27,7 +27,7 @@ function north_send_tag(arch, grid::DTRG, location) return parse(Int, field_id * loc_digit * side_digit) end -function northwest_recv_tag(arch, grid::DTRG, location) +function northwest_recv_tag(arch, ::DTRG, location) field_id = string(arch.mpi_tag[], pad=ID_DIGITS) loc_digit = string(loc_id(location...), pad=ID_DIGITS) last_rank = arch.local_index[2] == ranks(arch)[2] @@ -35,7 +35,7 @@ function northwest_recv_tag(arch, grid::DTRG, location) return parse(Int, field_id * loc_digit * side_digit) end -function northwest_send_tag(arch, grid::DTRG, location) +function northwest_send_tag(arch, ::DTRG, location) field_id = string(arch.mpi_tag[], pad=ID_DIGITS) loc_digit = string(loc_id(location...), pad=ID_DIGITS) last_rank = arch.local_index[2] == ranks(arch)[2] @@ -43,7 +43,7 @@ function northwest_send_tag(arch, grid::DTRG, location) return parse(Int, field_id * loc_digit * side_digit) end -function northeast_recv_tag(arch, grid::DTRG, location) +function northeast_recv_tag(arch, ::DTRG, location) field_id = string(arch.mpi_tag[], pad=ID_DIGITS) loc_digit = string(loc_id(location...), pad=ID_DIGITS) last_rank = arch.local_index[2] == ranks(arch)[2] @@ -51,7 +51,7 @@ function northeast_recv_tag(arch, grid::DTRG, location) return parse(Int, field_id * loc_digit * side_digit) end -function northeast_send_tag(arch, grid::DTRG, location) +function northeast_send_tag(arch, ::DTRG, location) field_id = string(arch.mpi_tag[], pad=ID_DIGITS) loc_digit = string(loc_id(location...), pad=ID_DIGITS) last_rank = arch.local_index[2] == ranks(arch)[2] From 6f6c6bd33ed93819ec58bbf4f449fabdb8edfe68 Mon Sep 17 00:00:00 2001 From: simone-silvestri Date: Tue, 20 Aug 2024 20:07:51 -0400 Subject: [PATCH 08/42] ok let's go now --- src/distributed_zipper.jl | 11 +++++------ src/split_explicit_free_surface.jl | 4 +++- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/distributed_zipper.jl b/src/distributed_zipper.jl index 5f0dd51..41347b9 100644 --- a/src/distributed_zipper.jl +++ b/src/distributed_zipper.jl @@ -12,15 +12,13 @@ using Oceananigans.DistributedComputations: cooperative_waitall!, import Oceananigans.BoundaryConditions: fill_halo_regions! import Oceananigans.DistributedComputations: synchronize_communication! -import Oceananigans.Fields: create_buffer_y, create_buffer_corner +@inline instantiate(T::DataType) = T() +@inline instantiate(T) = T const DistributedZipper = BoundaryCondition{<:DistributedCommunication, <:ZipperHaloCommunicationRanks} switch_north_halos!(c, north_bc, grid, loc) = nothing -@inline instantiate(T::DataType) = T() -@inline instantiate(T) = T - function switch_north_halos!(c, north_bc::DistributedZipper, grid, loc) sign = north_bc.condition.sign Hy = halo_size(grid)[2] @@ -42,11 +40,11 @@ _switch_north_halos!(c, ::Tuple{<:Center, <:Face, <:Any}, sign, sz, Ny, Hy) = # We throw away the first line and the first point! _switch_north_halos!(c, ::Tuple{<:Face, <:Center, <:Any}, sign, (Px, Py, Pz), Ny, Hy) = - view(c, :, Ny+Hy+1:Ny+2Hy-1, :) .= sign .* reverse(view(c, :, Ny+2Hy:-1:Ny+Hy+2, :), dims = 1) + view(c, 2:Px, Ny+Hy+1:Ny+2Hy-1, :) .= sign .* reverse(view(c, 2:Px, Ny+2Hy:-1:Ny+Hy+2, :), dims = 1) # We throw away the first line but not the first point! _switch_north_halos!(c, ::Tuple{<:Face, <:Face, <:Any}, sign, (Px, Py, Pz), Ny, Hy) = - view(c, :, Ny+Hy+1:Ny+2Hy, :) .= sign .* reverse(view(c, :, Ny+2Hy:-1:Ny+Hy+1, :), dims = 1) + view(c, 2:Px, Ny+Hy+1:Ny+2Hy, :) .= sign .* reverse(view(c, 2:Px, Ny+2Hy:-1:Ny+Hy+1, :), dims = 1) function fill_halo_regions!(c::OffsetArray, bcs, indices, loc, grid::DTRG, buffers, args...; only_local_halos = false, fill_boundary_normal_velocities = true, kwargs...) if fill_boundary_normal_velocities @@ -98,6 +96,7 @@ function synchronize_communication!(field::Field{<:Any, <:Any, <:Any, <:Any, <:D north_bc = field.boundary_conditions.north instantiated_location = map(instantiate, location(field)) + switch_north_halos!(field, north_bc, field.grid, instantiated_location) return nothing diff --git a/src/split_explicit_free_surface.jl b/src/split_explicit_free_surface.jl index 543a416..96883f5 100644 --- a/src/split_explicit_free_surface.jl +++ b/src/split_explicit_free_surface.jl @@ -95,7 +95,9 @@ function materialize_free_surface(free_surface::SplitExplicitFreeSurface, veloci old_halos = halo_size(grid) Nsubsteps = length(settings.substepping.averaging_weights) - extended_halos = tripolar_split_explicit_halos(old_halos, Nsubsteps+1, grid) + # We need 1 additional halos in both directions because of the shifting + # caused by by the fill halo of the horizontal velocity. + extended_halos = tripolar_split_explicit_halos(old_halos, Nsubsteps+3, grid) extended_grid = with_halo(extended_halos, grid) Nze = size(extended_grid, 3) From 3dfda4a21cd1eb5cf179aaae76e418f7deb0e661 Mon Sep 17 00:00:00 2001 From: simone-silvestri Date: Tue, 20 Aug 2024 20:39:07 -0400 Subject: [PATCH 09/42] fixed another issue? --- src/split_explicit_free_surface.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/split_explicit_free_surface.jl b/src/split_explicit_free_surface.jl index 96883f5..30d2fc6 100644 --- a/src/split_explicit_free_surface.jl +++ b/src/split_explicit_free_surface.jl @@ -53,7 +53,7 @@ function positive_zipper_boundary(default_field, grid::DTRG) arch = architecture(grid) workers = ranks(arch.partition) - if arch.local_rank == workers[2] - 1 + if arch.local_index[2] == workers[2] return FieldBoundaryConditions( top = nothing, bottom = nothing, From cf017537a1e5964b0f54ef8c95da4128e0813c81 Mon Sep 17 00:00:00 2001 From: simone-silvestri Date: Tue, 20 Aug 2024 20:40:46 -0400 Subject: [PATCH 10/42] too restrictive! --- src/distributed_tripolar_grid.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/distributed_tripolar_grid.jl b/src/distributed_tripolar_grid.jl index dd86372..7fb0cc1 100644 --- a/src/distributed_tripolar_grid.jl +++ b/src/distributed_tripolar_grid.jl @@ -32,7 +32,7 @@ function TripolarGrid(arch::Distributed, FT::DataType=Float64; # Check that partitioning in x is correct: try - if isodd(arch.partition.x) + if isodd(arch.partition.x) && (arch.partition.x != 1) throw(ArgumentError("The number of partitionsOnly even partitioning in x is supported with the TripolarGrid")) end catch From 12cc2adc7efeb4595e46533c474d915b81a650a4 Mon Sep 17 00:00:00 2001 From: simone-silvestri Date: Tue, 20 Aug 2024 21:46:34 -0400 Subject: [PATCH 11/42] perf --- src/zipper_boundary_condition.jl | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/zipper_boundary_condition.jl b/src/zipper_boundary_condition.jl index 06811b1..d6c1539 100644 --- a/src/zipper_boundary_condition.jl +++ b/src/zipper_boundary_condition.jl @@ -74,9 +74,7 @@ validate_boundary_condition_location(bc::Zipper, loc::Face, side) = Hy = grid.Hy for j = 1 : Hy - @inbounds begin - c[i, Ny + j, k] = s * c[i′, Ny - j + 1, k] - end + @inbounds c[i, Ny + j, k] = s * c[i′, Ny - j + 1, k] end return nothing @@ -91,9 +89,7 @@ end Hy = grid.Hy for j = 1 : Hy - @inbounds begin - c[i, Ny + j, k] = s * c[i′, Ny - j, k] # The Ny line is duplicated so we substitute starting Ny-1 - end + @inbounds c[i, Ny + j, k] = s * c[i′, Ny - j, k] # The Ny line is duplicated so we substitute starting Ny-1 end return nothing @@ -106,9 +102,7 @@ end Hy = grid.Hy for j = 1 : Hy - @inbounds begin - c[i, Ny + j, k] = sign * c[i′, Ny - j + 1, k] - end + @inbounds c[i, Ny + j, k] = sign * c[i′, Ny - j + 1, k] end return nothing @@ -121,9 +115,7 @@ end Hy = grid.Hy for j = 1 : Hy - @inbounds begin - c[i, Ny + j, k] = sign * c[i′, Ny - j, k] # The Ny line is duplicated so we substitute starting Ny-1 - end + @inbounds c[i, Ny + j, k] = sign * c[i′, Ny - j, k] # The Ny line is duplicated so we substitute starting Ny-1 end return nothing From 5306f3886f1b16adf7dd98be6242257b2672a976 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Fri, 23 Aug 2024 11:41:21 -0400 Subject: [PATCH 12/42] should work now! --- src/distributed_tripolar_grid.jl | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/src/distributed_tripolar_grid.jl b/src/distributed_tripolar_grid.jl index dd86372..ba17a3f 100644 --- a/src/distributed_tripolar_grid.jl +++ b/src/distributed_tripolar_grid.jl @@ -116,10 +116,12 @@ function TripolarGrid(arch::Distributed, FT::DataType=Float64; # Make sure the northwest and northeast connectivities are correct northwest_recv_rank = receiving_rank(arch; receive_idx_x = northwest_idx_x) northeast_recv_rank = receiving_rank(arch; receive_idx_x = northeast_idx_x) + north_recv_rank = receiving_rank(arch) if yrank == workers[2] - 1 arch.connectivity.northeast = northwest_recv_rank arch.connectivity.northwest = northeast_recv_rank + arch.connectivity.north = north_recv_rank end end @@ -186,9 +188,8 @@ ZipperHaloCommunicationRanks(sign; from, to) = ZipperHaloCommunicationRanks(from Base.summary(hcr::ZipperHaloCommunicationRanks) = "ZipperHaloCommunicationRanks from rank $(hcr.from) to rank $(hcr.to)" # Finding out the paired rank to communicate the north boundary -# in case of a DistributedZipperBoundaryCondition -function receiving_rank(arch; - receive_idx_x = ranks(arch)[1] - arch.local_index[1] + 1) +# in case of a DistributedZipperBoundaryCondition using a "Handshake" procedure +function receiving_rank(arch; receive_idx_x = ranks(arch)[1] - arch.local_index[1] + 1) Ry = ranks(arch)[2] receive_rank = 0 @@ -202,16 +203,12 @@ function receiving_rank(arch; my_y_idx = arch.local_index[2] end - barrier!(arch) - x_idx = all_reduce(+, my_x_idx, arch) y_idx = all_reduce(+, my_y_idx, arch) if x_idx == receive_idx_x && y_idx == Ry receive_rank = rank end - - barrier!(arch) end return receive_rank @@ -226,7 +223,6 @@ function regularize_field_boundary_conditions(bcs::FieldBoundaryConditions, arch = architecture(grid) loc = assumed_field_location(field_name) - xrank = arch.local_index[1] - 1 yrank = arch.local_index[2] - 1 processor_size = ranks(arch) @@ -236,15 +232,13 @@ function regularize_field_boundary_conditions(bcs::FieldBoundaryConditions, east = regularize_boundary_condition(bcs.east, grid, loc, 1, RightBoundary, prognostic_names) south = regularize_boundary_condition(bcs.south, grid, loc, 2, LeftBoundary, prognostic_names) - north_recv_rank = receiving_rank(arch) - north = if yrank == processor_size[2] - 1 && processor_size[1] == 1 ZipperBoundaryCondition(sign) elseif yrank == processor_size[2] - 1 && processor_size[1] != 1 from = arch.local_rank # Search the rank to send to - to = north_recv_rank + to = arch.connectivity.north halo_communication = ZipperHaloCommunicationRanks(sign; from, to) DistributedCommunicationBoundaryCondition(halo_communication) @@ -265,7 +259,6 @@ end # with a sign that depends on the location of the field (revert the value of the halos if on edges, keep it if on nodes or centers) function Field((LX, LY, LZ)::Tuple, grid::DTRG, data, old_bcs, indices::Tuple, op, status) arch = architecture(grid) - xrank = arch.local_index[1] - 1 yrank = arch.local_index[2] - 1 processor_size = ranks(arch) @@ -275,8 +268,6 @@ function Field((LX, LY, LZ)::Tuple, grid::DTRG, data, old_bcs, indices::Tuple, o validate_boundary_conditions((LX, LY, LZ), grid, old_bcs) default_zipper = ZipperBoundaryCondition(sign(LX, LY)) - north_recv_rank = receiving_rank(arch) - if isnothing(old_bcs) || ismissing(old_bcs) new_bcs = old_bcs else @@ -296,7 +287,7 @@ function Field((LX, LY, LZ)::Tuple, grid::DTRG, data, old_bcs, indices::Tuple, o elseif yrank == processor_size[2] - 1 && processor_size[1] != 1 sgn = old_bcs.north isa ZBC ? old_bcs.north.condition : sign(LX, LY) from = arch.local_rank - to = north_recv_rank + to = arch.connectivity.north halo_communication = ZipperHaloCommunicationRanks(sgn; from, to) north_bc = DistributedCommunicationBoundaryCondition(halo_communication) From cd12fb2c62cafe04aab14b5b50d953306a36387f Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Sat, 24 Aug 2024 17:42:52 -0400 Subject: [PATCH 13/42] done? --- src/interpolation.jl | 243 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 243 insertions(+) create mode 100644 src/interpolation.jl diff --git a/src/interpolation.jl b/src/interpolation.jl new file mode 100644 index 0000000..2a05b30 --- /dev/null +++ b/src/interpolation.jl @@ -0,0 +1,243 @@ +using OrthogonalSphericalShellGrids +using OrthogonalSphericalShellGrids: TRG +using Oceananigans +using Oceananigans.Operators: Δx, Δy +using Oceananigans.Grids: OSSG, λnodes, φnodes +using Oceananigans.Fields: fractional_index, fractional_z_index, AbstractField + +import Oceananigans.Fields: interpolate, interpolate!, fractional_indices + +TRGField = Field{<:Any, <:Any, <:Any, <:Any, <:TRG} + +struct InterpolationWeights{LXT, LYT, LXF, LYF, I, J, W} + i_indices :: I + j_indices :: J + weights :: W + + InterpolationWeights{LXT, LYT, LXF, LYF}(i::I, j::J, w::W) where {LXT, LYT, LXF, LYF, I, J, W}= new{LXT, LYT, LXF, LYF, I, J, W}(i, j, w) +end + +Adapt.adapt_structure(to, iw::InterpolationWeights{LXT, LYT, LXF, LYF}) = + InterpolationWeights{LXT, LYT, LXF, LYF}(Adapt.adapt(to, iw.i_indices), + Adapt.adapt(to, iw.j_indices), + Adapt.adapt(to, iw.weights)) + +function InterpolationWeights(to_field, from_field::TRG) + to_grid = to_field.grid + from_grid = from_field.grid + + Nx, Ny, _ = size(to_grid) + arch = architecture(to_grid) + + i_indices = on_architecture(arch, zeros(Int, Nx, Ny)) + j_indices = on_architecture(arch, zeros(Int, Nx, Ny)) + weights = on_architecture(arch, zeros(eltype(to_grid), Nx, Ny, 9)) + + from_loc = location(from_field) + to_loc = location(to_field) + + launch!(arch, to_grid, :xy, compute_weights!, + i_indices, j_indices, weights, + to_grid, from_grid, map(instantiate, to_loc), map(instantiate, from_loc)) + + return InterpolationWeights{to_loc[1], to_loc[2], from_loc[1], from_loc[2]}(i_indices, j_indices, weights) +end + +@kernel function compute_weights(i_indices, j_indices, weights, to_grid, from_grid, to_loc, from_loc) + i, j = @index(Global, NTuple) + + λ₀ = λnode(i, j, 1, to_grid, to_loc...) + φ₀ = φnode(i, j, 1, to_grid, to_loc...) + i₀, j₀, d₀₀, d₀₁, d₁₀, d₀₂, d₂₀, d₁₁, d₂₂, d₁₂, d₂₁ = horizontal_distances(λ₀, φ₀, from_loc, from_grid) + + @inbounds begin + i_indices[i, j] = i₀ + j_indices[i, j] = j₀ + + weights[i, j, 1] = 1 / d₀₀ + weights[i, j, 2] = 1 / d₀₁ + weights[i, j, 3] = 1 / d₁₀ + weights[i, j, 4] = 1 / d₀₂ + weights[i, j, 5] = 1 / d₂₀ + weights[i, j, 6] = 1 / d₁₁ + weights[i, j, 7] = 1 / d₂₂ + weights[i, j, 8] = 1 / d₁₂ + weights[i, j, 9] = 1 / d₂₁ + end +end + +function interpolate!(to_field, from_field::TRG, interpolation_weigths = nothing) + + # Make sure weigths are coorect + if !(interpolation_weigths isa InterpolationWeights) + interpolation_weigths = InterpolationWeights(to_field, from_field) + end + + to_grid = to_field.grid + from_grid = from_field.grid + + to_arch = architecture(to_field) + from_arch = architecture(from_field) + + # In case architectures are `Distributed` we + # verify that the fields are on the same child architecture + to_arch = child_architecture(to_arch) + from_arch = child_architecture(from_arch) + + if !isnothing(from_arch) && to_arch != from_arch + msg = "Cannot interpolate! because from_field is on $from_arch while to_field is on $to_arch." + throw(ArgumentError(msg)) + end + + # Make locations + to_ℓz = location(to_field)[3]() + from_loc = map(instantiate, location(to_field)) + + launch!(to_arch, to_grid, size(to_field), + _nearest_neigbor_interpolate!, to_field, to_ℓz, to_grid, from_field, from_loc, from_grid, interpolation_weigths) + + fill_halo_regions!(to_field) +end + +@kernel function _nearest_neigbor_interpolate!(to_field, to_ℓz, to_grid, from_field, from_loc, from_grid, iw) + i, j, k = @index(Global, NTuple) + + z = znode(k, to_grid, to_ℓz) + kk = fractional_z_index(z, from_loc, grid) + + k⁻, k⁺, ζ = interpolator(kk) + + iₒ = @inbounds iw.i_indices[i, j] + jₒ = @inbounds iw.j_indices[i, j] + + ϕ⁻ = horizontal_interpolate(i, j, from_grid, from_field, i₀, j₀, k⁻, iw.weights) + ϕ⁺ = horizontal_interpolate(i, j, from_grid, from_field, i₀, j₀, k⁺, iw.weights) + + @inbounds to_field[i, j, k] = ϕ⁻ * (1 - ζ) + ϕ⁺ * ζ +end + +@inline function horizontal_interpolate(i, j, from_grid, from_field, i₀, j₀, k₀, weights) + + i₁ = ifelse(i₀ == 0, from_grid.Nx, i₀ - 1) + j₁ = ifelse(j₀ == 0, j₀, j₀ - 1) + i₂ = ifelse(i₀ == size(from_field, 1), 1, i₀ + 1) + j₂ = ifelse(j₀ == size(from_field, 2), j₀, j₀ + 1) + + @inbounds begin + f₀₀ = from_field[i₀, j₀, k₀] + f₀₁ = from_field[i₀, j₁, k₀] + f₁₀ = from_field[i₁, j₀, k₀] + f₀₂ = from_field[i₀, j₂, k₀] + f₂₀ = from_field[i₂, j₀, k₀] + f₁₁ = from_field[i₁, j₁, k₀] + f₂₂ = from_field[i₂, j₂, k₀] + f₁₂ = from_field[i₁, j₂, k₀] + f₂₁ = from_field[i₂, j₁, k₀] + + w₀₀ = weights[i, j, 1] + w₀₁ = weights[i, j, 2] + w₁₀ = weights[i, j, 3] + w₀₂ = weights[i, j, 4] + w₂₀ = weights[i, j, 5] + w₁₁ = weights[i, j, 6] + w₂₂ = weights[i, j, 7] + w₁₂ = weights[i, j, 8] + w₂₁ = weights[i, j, 9] + end + + f = f₀₀ * w₀₀ + f₀₁ * w₀₁ + f₁₀ * w₁₀ + f₀₂ * w₀₂ + f₂₀ * w₂₀ + f₁₁ * w₁₁ + f₂₂ * w₂₂ + f₁₂ * w₁₂ + f₂₁ * w₂₁ + + return f / (w₀₀ + w₀₁ + w₁₀ + w₀₂ + w₂₀ + w₁₁ + w₂₂ + w₁₂ + w₂₁) +end + +@inline function distance(x₁, y₁, x₂, y₂) + dx = x₁ - x₂ + dy = y₁ - y₂ + return dx * dx + dy * dy +end + +@inline function check_and_update(dist, i₀, j₀, i, j, λ₀, φ₀, λ, φ) + d = distance(λ₀, φ₀, λ , φ) + i₀ = ifelse(d < dist, i, i₀) + j₀ = ifelse(d < dist, j, j₀) + dist = min(d, dist) + + return dist, i₀, j₀ +end + +# # We assume that in an TRG, the latitude lines for a given i - index are sorted +# # i.e. φ is monotone in j. This is not the case for λ that might jump between 0 and 360. +@inline function horizontal_distances(λ₀, φ₀, loc, grid) + # This is a "naive" algorithm, so it is going to be quite slow! + # Optimizations are welcome! + λ = λnodes(grid, loc...; with_halos = true) + φ = φnodes(grid, loc...; with_halos = true) + + Nx, Ny, _ = size(grid) + + # We search for an initial valid option + dist = Inf + i₀ = 1 + j₀ = 1 + + @inbounds begin + for i = 1:Nx + jⁿ = fractional_index(φ₀, φ[i, :], Ny) - 1 + j⁻ = floor(Int, jⁿ) + j⁺ = j⁻ + 1 + + if j⁻ <= grid.Ny + dist, i₀, j₀ = check_and_update(dist, i₀, j₀, i, j⁻, λ₀, φ₀, λ[i, j⁻], φ[i, j⁻]) + end + + if j⁺ <= grid.Ny + dist, i₀, j₀ = check_and_update(dist, i₀, j₀, i, j⁺, λ₀, φ₀, λ[i, j⁺], φ[i, j⁺]) + end + end + end + + # Now find the closest neighbors given i₀ and j₀ + i₁ = ifelse(i₀ == 0, grid.Nx, i₀ - 1) + j₁ = ifelse(j₀ == 0, j₀, j₀ - 1) + i₂ = ifelse(i₀ == size(λ, 1), 1, i₀ + 1) + j₂ = ifelse(j₀ == size(λ, 2), j₀, j₀ + 1) + + @inbounds begin + λ₀₀ = massage_longitude(λ₀, λ[i₀, j₀]) + λ₀₁ = massage_longitude(λ₀, λ[i₀, j₁]) + λ₁₀ = massage_longitude(λ₀, λ[i₁, j₀]) + λ₀₂ = massage_longitude(λ₀, λ[i₀, j₂]) + λ₂₀ = massage_longitude(λ₀, λ[i₂, j₀]) + λ₁₁ = massage_longitude(λ₀, λ[i₁, j₁]) + λ₂₂ = massage_longitude(λ₀, λ[i₂, j₂]) + λ₁₂ = massage_longitude(λ₀, λ[i₁, j₂]) + λ₂₁ = massage_longitude(λ₀, λ[i₂, j₁]) + + φ₀₀ = φ[i₀, j₀] + φ₀₁ = φ[i₀, j₁] + φ₁₀ = φ[i₁, j₀] + φ₀₂ = φ[i₀, j₂] + φ₂₀ = φ[i₂, j₀] + φ₁₁ = φ[i₁, j₁] + φ₂₂ = φ[i₂, j₂] + φ₁₂ = φ[i₁, j₂] + φ₂₁ = φ[i₂, j₁] + end + + d₀₀ = distance(λ₀, φ₀, λ₀₀, φ₀₀) + d₀₁ = distance(λ₀, φ₀, λ₀₁, φ₀₁) + d₁₀ = distance(λ₀, φ₀, λ₁₀, φ₁₀) + d₀₂ = distance(λ₀, φ₀, λ₀₂, φ₀₂) + d₂₀ = distance(λ₀, φ₀, λ₂₀, φ₂₀) + + d₁₁ = distance(λ₀, φ₀, λ₁₁, φ₁₁) + d₂₂ = distance(λ₀, φ₀, λ₂₂, φ₂₂) + d₁₂ = distance(λ₀, φ₀, λ₁₂, φ₁₂) + d₂₁ = distance(λ₀, φ₀, λ₂₁, φ₂₁) + + return i₀, j₀, d₀₀, d₀₁, d₁₀, d₀₂, d₂₀, d₁₁, d₂₂, d₁₂, d₂₁ +end + +# We assume that all points are very close to each other +@inline massage_longitudes(λ₀, λ) = ifelse(abs(λ₀ - λ) > 180, + ifelse(λ₀ > 180, λ + 360, λ - 360), λ) \ No newline at end of file From 247c667fb5f95a8028a49f81b44aa468235396e6 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Sat, 24 Aug 2024 17:43:40 -0400 Subject: [PATCH 14/42] adding nearest neighbor interpolation --- src/OrthogonalSphericalShellGrids.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/OrthogonalSphericalShellGrids.jl b/src/OrthogonalSphericalShellGrids.jl index 7467049..dbe4d93 100644 --- a/src/OrthogonalSphericalShellGrids.jl +++ b/src/OrthogonalSphericalShellGrids.jl @@ -12,7 +12,8 @@ using Oceananigans.Grids: R_Earth, halo_size, spherical_area_quadrilateral, lat_lon_to_cartesian, generate_coordinate, topology using Oceananigans.Operators -using Oceananigans.Utils: get_cartesian_nodes_and_vertices + +using Oceananigans.Utils: get_cartesian_nodes_and_vertices, using Adapt using JLD2 @@ -32,5 +33,6 @@ include("distributed_zipper.jl") include("distributed_zipper_north_tags.jl") include("with_halo.jl") include("split_explicit_free_surface.jl") +include("interpolation.jl") end From b5baa5c16f45dad4c1c4c66d26d506827f8f0637 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Sat, 24 Aug 2024 17:47:27 -0400 Subject: [PATCH 15/42] good --- src/interpolation.jl | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/interpolation.jl b/src/interpolation.jl index 2a05b30..192dfdd 100644 --- a/src/interpolation.jl +++ b/src/interpolation.jl @@ -17,6 +17,9 @@ struct InterpolationWeights{LXT, LYT, LXF, LYF, I, J, W} InterpolationWeights{LXT, LYT, LXF, LYF}(i::I, j::J, w::W) where {LXT, LYT, LXF, LYF, I, J, W}= new{LXT, LYT, LXF, LYF, I, J, W}(i, j, w) end +@inline from_location(::InterpolationWeights{LXT, LYT, LXF, LYF}) = (LXF, LYF) +@inline to_location(::InterpolationWeights{LXT, LYT, LXF, LYF}) = (LXT, LYT) + Adapt.adapt_structure(to, iw::InterpolationWeights{LXT, LYT, LXF, LYF}) = InterpolationWeights{LXT, LYT, LXF, LYF}(Adapt.adapt(to, iw.i_indices), Adapt.adapt(to, iw.j_indices), @@ -67,10 +70,23 @@ end end function interpolate!(to_field, from_field::TRG, interpolation_weigths = nothing) - + + to_loc = location(to_field) + from_loc = location(to_field) + # Make sure weigths are coorect if !(interpolation_weigths isa InterpolationWeights) interpolation_weigths = InterpolationWeights(to_field, from_field) + else + # Check that the locations are correct + LXF, LYF = from_location(interpolation_weigths) + LXT, LYT = to_location(interpolation_weigths) + + correct_locations = (LXF, LYF) == from_loc && (LXT, LYT) == to_loc + + if !correct_locations + throw("The location of the interpolation weigths do not coincide with the locations of the in and out fields") + end end to_grid = to_field.grid @@ -97,6 +113,8 @@ function interpolate!(to_field, from_field::TRG, interpolation_weigths = nothing _nearest_neigbor_interpolate!, to_field, to_ℓz, to_grid, from_field, from_loc, from_grid, interpolation_weigths) fill_halo_regions!(to_field) + + return to_field end @kernel function _nearest_neigbor_interpolate!(to_field, to_ℓz, to_grid, from_field, from_loc, from_grid, iw) From 3d148d540ba0b47bffd56e8805e05f6ebb39a35c Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Sat, 24 Aug 2024 17:48:03 -0400 Subject: [PATCH 16/42] another change --- src/interpolation.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/interpolation.jl b/src/interpolation.jl index 192dfdd..07a6a76 100644 --- a/src/interpolation.jl +++ b/src/interpolation.jl @@ -125,8 +125,8 @@ end k⁻, k⁺, ζ = interpolator(kk) - iₒ = @inbounds iw.i_indices[i, j] - jₒ = @inbounds iw.j_indices[i, j] + i₀ = @inbounds iw.i_indices[i, j] + j₀ = @inbounds iw.j_indices[i, j] ϕ⁻ = horizontal_interpolate(i, j, from_grid, from_field, i₀, j₀, k⁻, iw.weights) ϕ⁺ = horizontal_interpolate(i, j, from_grid, from_field, i₀, j₀, k⁺, iw.weights) From 04887c948be9a0205e97df5cf15d4890dd9dc82e Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Sat, 24 Aug 2024 17:49:38 -0400 Subject: [PATCH 17/42] bugfix --- src/OrthogonalSphericalShellGrids.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/OrthogonalSphericalShellGrids.jl b/src/OrthogonalSphericalShellGrids.jl index dbe4d93..229feee 100644 --- a/src/OrthogonalSphericalShellGrids.jl +++ b/src/OrthogonalSphericalShellGrids.jl @@ -13,7 +13,7 @@ using Oceananigans.Grids: R_Earth, lat_lon_to_cartesian, generate_coordinate, topology using Oceananigans.Operators -using Oceananigans.Utils: get_cartesian_nodes_and_vertices, +using Oceananigans.Utils: get_cartesian_nodes_and_vertices using Adapt using JLD2 From 0feb8babcb6f125a97fe73837e99bc525080c080 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Sat, 24 Aug 2024 17:50:50 -0400 Subject: [PATCH 18/42] bugfix --- src/interpolation.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/interpolation.jl b/src/interpolation.jl index 07a6a76..0462630 100644 --- a/src/interpolation.jl +++ b/src/interpolation.jl @@ -14,13 +14,13 @@ struct InterpolationWeights{LXT, LYT, LXF, LYF, I, J, W} j_indices :: J weights :: W - InterpolationWeights{LXT, LYT, LXF, LYF}(i::I, j::J, w::W) where {LXT, LYT, LXF, LYF, I, J, W}= new{LXT, LYT, LXF, LYF, I, J, W}(i, j, w) + InterpolationWeights{LXT, LYT, LXF, LYF}(i::I, j::J, w::W) where {LXT, LYT, LXF, LYF, I, J, W} = new{LXT, LYT, LXF, LYF, I, J, W}(i, j, w) end -@inline from_location(::InterpolationWeights{LXT, LYT, LXF, LYF}) = (LXF, LYF) -@inline to_location(::InterpolationWeights{LXT, LYT, LXF, LYF}) = (LXT, LYT) +@inline from_location(::InterpolationWeights{LXT, LYT, LXF, LYF}) where {LXT, LYT, LXF, LYF} = (LXF, LYF) +@inline to_location(::InterpolationWeights{LXT, LYT, LXF, LYF}) where {LXT, LYT, LXF, LYF} = (LXT, LYT) -Adapt.adapt_structure(to, iw::InterpolationWeights{LXT, LYT, LXF, LYF}) = +Adapt.adapt_structure(to, iw::InterpolationWeights{LXT, LYT, LXF, LYF}) where {LXT, LYT, LXF, LYF} = InterpolationWeights{LXT, LYT, LXF, LYF}(Adapt.adapt(to, iw.i_indices), Adapt.adapt(to, iw.j_indices), Adapt.adapt(to, iw.weights)) From d6d820fd8afbf92c7459c1a7283cfc6821879934 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Sat, 24 Aug 2024 17:53:15 -0400 Subject: [PATCH 19/42] better --- src/interpolation.jl | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/interpolation.jl b/src/interpolation.jl index 0462630..a6a61dc 100644 --- a/src/interpolation.jl +++ b/src/interpolation.jl @@ -2,7 +2,7 @@ using OrthogonalSphericalShellGrids using OrthogonalSphericalShellGrids: TRG using Oceananigans using Oceananigans.Operators: Δx, Δy -using Oceananigans.Grids: OSSG, λnodes, φnodes +using Oceananigans.Grids: λnodes, φnodes, λnode, φnode using Oceananigans.Fields: fractional_index, fractional_z_index, AbstractField import Oceananigans.Fields: interpolate, interpolate!, fractional_indices @@ -14,7 +14,9 @@ struct InterpolationWeights{LXT, LYT, LXF, LYF, I, J, W} j_indices :: J weights :: W - InterpolationWeights{LXT, LYT, LXF, LYF}(i::I, j::J, w::W) where {LXT, LYT, LXF, LYF, I, J, W} = new{LXT, LYT, LXF, LYF, I, J, W}(i, j, w) + function InterpolationWeights{LXT, LYT, LXF, LYF}(i::I, j::J, w::W) where {LXT, LYT, LXF, LYF, I, J, W} + return new{LXT, LYT, LXF, LYF, I, J, W}(i, j, w) + end end @inline from_location(::InterpolationWeights{LXT, LYT, LXF, LYF}) where {LXT, LYT, LXF, LYF} = (LXF, LYF) @@ -25,7 +27,7 @@ Adapt.adapt_structure(to, iw::InterpolationWeights{LXT, LYT, LXF, LYF}) where {L Adapt.adapt(to, iw.j_indices), Adapt.adapt(to, iw.weights)) -function InterpolationWeights(to_field, from_field::TRG) +function InterpolationWeights(to_field, from_field::TRGField) to_grid = to_field.grid from_grid = from_field.grid @@ -39,14 +41,14 @@ function InterpolationWeights(to_field, from_field::TRG) from_loc = location(from_field) to_loc = location(to_field) - launch!(arch, to_grid, :xy, compute_weights!, + launch!(arch, to_grid, :xy, _compute_weights!, i_indices, j_indices, weights, to_grid, from_grid, map(instantiate, to_loc), map(instantiate, from_loc)) return InterpolationWeights{to_loc[1], to_loc[2], from_loc[1], from_loc[2]}(i_indices, j_indices, weights) end -@kernel function compute_weights(i_indices, j_indices, weights, to_grid, from_grid, to_loc, from_loc) +@kernel function _compute_weights!(i_indices, j_indices, weights, to_grid, from_grid, to_loc, from_loc) i, j = @index(Global, NTuple) λ₀ = λnode(i, j, 1, to_grid, to_loc...) @@ -257,5 +259,5 @@ end end # We assume that all points are very close to each other -@inline massage_longitudes(λ₀, λ) = ifelse(abs(λ₀ - λ) > 180, - ifelse(λ₀ > 180, λ + 360, λ - 360), λ) \ No newline at end of file +@inline massage_longitude(λ₀, λ) = ifelse(abs(λ₀ - λ) > 180, + ifelse(λ₀ > 180, λ + 360, λ - 360), λ) \ No newline at end of file From 7536406b062e2991b03fd706ed0cffb64fe12ecc Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Sat, 24 Aug 2024 18:01:20 -0400 Subject: [PATCH 20/42] done --- src/interpolation.jl | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/src/interpolation.jl b/src/interpolation.jl index a6a61dc..7932a9a 100644 --- a/src/interpolation.jl +++ b/src/interpolation.jl @@ -2,13 +2,17 @@ using OrthogonalSphericalShellGrids using OrthogonalSphericalShellGrids: TRG using Oceananigans using Oceananigans.Operators: Δx, Δy -using Oceananigans.Grids: λnodes, φnodes, λnode, φnode -using Oceananigans.Fields: fractional_index, fractional_z_index, AbstractField +using Oceananigans.Grids: λnodes, φnodes, λnode, φnode, znode +using Oceananigans.Fields: fractional_index, fractional_z_index, AbstractField, interpolator -import Oceananigans.Fields: interpolate, interpolate!, fractional_indices +import Oceananigans.Fields: interpolate! TRGField = Field{<:Any, <:Any, <:Any, <:Any, <:TRG} +##### +##### Nearest Neighbor Interpolation from a Tripolar Field to a Latitude Longitude Field +##### + struct InterpolationWeights{LXT, LYT, LXF, LYF, I, J, W} i_indices :: I j_indices :: J @@ -28,6 +32,7 @@ Adapt.adapt_structure(to, iw::InterpolationWeights{LXT, LYT, LXF, LYF}) where {L Adapt.adapt(to, iw.weights)) function InterpolationWeights(to_field, from_field::TRGField) + to_grid = to_field.grid from_grid = from_field.grid @@ -38,8 +43,8 @@ function InterpolationWeights(to_field, from_field::TRGField) j_indices = on_architecture(arch, zeros(Int, Nx, Ny)) weights = on_architecture(arch, zeros(eltype(to_grid), Nx, Ny, 9)) - from_loc = location(from_field) to_loc = location(to_field) + from_loc = location(from_field) launch!(arch, to_grid, :xy, _compute_weights!, i_indices, j_indices, weights, @@ -71,12 +76,13 @@ end end end -function interpolate!(to_field, from_field::TRG, interpolation_weigths = nothing) +function interpolate!(to_field, from_field::TRGField, interpolation_weigths = nothing) to_loc = location(to_field) - from_loc = location(to_field) + from_loc = location(from_field) - # Make sure weigths are coorect + # Make sure weigths are correct + # TODO: a check also on the grid (to and from) if !(interpolation_weigths isa InterpolationWeights) interpolation_weigths = InterpolationWeights(to_field, from_field) else @@ -84,13 +90,16 @@ function interpolate!(to_field, from_field::TRG, interpolation_weigths = nothing LXF, LYF = from_location(interpolation_weigths) LXT, LYT = to_location(interpolation_weigths) - correct_locations = (LXF, LYF) == from_loc && (LXT, LYT) == to_loc + correct_locations = (LXF, LYF) == from_loc[1:2] && (LXT, LYT) == to_loc[1:2] if !correct_locations throw("The location of the interpolation weigths do not coincide with the locations of the in and out fields") end end + to_loc = map(instantiate, to_loc) + from_loc = map(instantiate, from_loc) + to_grid = to_field.grid from_grid = from_field.grid @@ -112,18 +121,18 @@ function interpolate!(to_field, from_field::TRG, interpolation_weigths = nothing from_loc = map(instantiate, location(to_field)) launch!(to_arch, to_grid, size(to_field), - _nearest_neigbor_interpolate!, to_field, to_ℓz, to_grid, from_field, from_loc, from_grid, interpolation_weigths) + _nearest_neigbor_interpolate!, to_field, to_loc, to_grid, from_field, from_loc, from_grid, interpolation_weigths) fill_halo_regions!(to_field) return to_field end -@kernel function _nearest_neigbor_interpolate!(to_field, to_ℓz, to_grid, from_field, from_loc, from_grid, iw) +@kernel function _nearest_neigbor_interpolate!(to_field, to_loc, to_grid, from_field, from_loc, from_grid, iw) i, j, k = @index(Global, NTuple) - z = znode(k, to_grid, to_ℓz) - kk = fractional_z_index(z, from_loc, grid) + z = znode(k, to_grid, to_loc[3]) + kk = fractional_z_index(z, from_loc, from_grid) k⁻, k⁺, ζ = interpolator(kk) From 431b703a55277f82b8f2cfd4aab7625095f69651 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Sat, 24 Aug 2024 18:02:17 -0400 Subject: [PATCH 21/42] add interpolation --- src/interpolation.jl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/interpolation.jl b/src/interpolation.jl index 7932a9a..4792adc 100644 --- a/src/interpolation.jl +++ b/src/interpolation.jl @@ -116,10 +116,6 @@ function interpolate!(to_field, from_field::TRGField, interpolation_weigths = no throw(ArgumentError(msg)) end - # Make locations - to_ℓz = location(to_field)[3]() - from_loc = map(instantiate, location(to_field)) - launch!(to_arch, to_grid, size(to_field), _nearest_neigbor_interpolate!, to_field, to_loc, to_grid, from_field, from_loc, from_grid, interpolation_weigths) From 8235e11417ea28f8d43de00b6a8324507d4df4ce Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Sat, 24 Aug 2024 18:03:53 -0400 Subject: [PATCH 22/42] formatting --- src/interpolation.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interpolation.jl b/src/interpolation.jl index 4792adc..85f404f 100644 --- a/src/interpolation.jl +++ b/src/interpolation.jl @@ -97,7 +97,7 @@ function interpolate!(to_field, from_field::TRGField, interpolation_weigths = no end end - to_loc = map(instantiate, to_loc) + to_loc = map(instantiate, to_loc) from_loc = map(instantiate, from_loc) to_grid = to_field.grid From 93e5f5d9dd408d0f1fe8bb33840e30a076e2a155 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Sat, 24 Aug 2024 18:04:34 -0400 Subject: [PATCH 23/42] better formatting --- src/interpolation.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/interpolation.jl b/src/interpolation.jl index 85f404f..54cbc4d 100644 --- a/src/interpolation.jl +++ b/src/interpolation.jl @@ -170,9 +170,10 @@ end w₂₁ = weights[i, j, 9] end - f = f₀₀ * w₀₀ + f₀₁ * w₀₁ + f₁₀ * w₁₀ + f₀₂ * w₀₂ + f₂₀ * w₂₀ + f₁₁ * w₁₁ + f₂₂ * w₂₂ + f₁₂ * w₁₂ + f₂₁ * w₂₁ - - return f / (w₀₀ + w₀₁ + w₁₀ + w₀₂ + w₂₀ + w₁₁ + w₂₂ + w₁₂ + w₂₁) + F = f₀₀ * w₀₀ + f₀₁ * w₀₁ + f₁₀ * w₁₀ + f₀₂ * w₀₂ + f₂₀ * w₂₀ + f₁₁ * w₁₁ + f₂₂ * w₂₂ + f₁₂ * w₁₂ + f₂₁ * w₂₁ + W = w₀₀ + w₀₁ + w₁₀ + w₀₂ + w₂₀ + w₁₁ + w₂₂ + w₁₂ + w₂₁ + + return F / W end @inline function distance(x₁, y₁, x₂, y₂) From 5af743bf5cb51097c596e301d8caca5244502e7b Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Sat, 24 Aug 2024 18:07:19 -0400 Subject: [PATCH 24/42] better formatting --- src/interpolation.jl | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/src/interpolation.jl b/src/interpolation.jl index 54cbc4d..5460e54 100644 --- a/src/interpolation.jl +++ b/src/interpolation.jl @@ -124,7 +124,7 @@ function interpolate!(to_field, from_field::TRGField, interpolation_weigths = no return to_field end -@kernel function _nearest_neigbor_interpolate!(to_field, to_loc, to_grid, from_field, from_loc, from_grid, iw) +@kernel function _nearest_neigbor_interpolate!(to_field, to_loc, to_grid, from_field, from_loc, from_grid, interpolation_weights) i, j, k = @index(Global, NTuple) z = znode(k, to_grid, to_loc[3]) @@ -132,32 +132,32 @@ end k⁻, k⁺, ζ = interpolator(kk) - i₀ = @inbounds iw.i_indices[i, j] - j₀ = @inbounds iw.j_indices[i, j] - - ϕ⁻ = horizontal_interpolate(i, j, from_grid, from_field, i₀, j₀, k⁻, iw.weights) - ϕ⁺ = horizontal_interpolate(i, j, from_grid, from_field, i₀, j₀, k⁺, iw.weights) + ϕ⁻ = horizontal_interpolate(i, j, k⁻, from_grid, from_field, interpolation_weights) + ϕ⁺ = horizontal_interpolate(i, j, k⁺, from_grid, from_field, interpolation_weights) @inbounds to_field[i, j, k] = ϕ⁻ * (1 - ζ) + ϕ⁺ * ζ end -@inline function horizontal_interpolate(i, j, from_grid, from_field, i₀, j₀, k₀, weights) +@inline function horizontal_interpolate(i, j, k, grid, from_field, weights) - i₁ = ifelse(i₀ == 0, from_grid.Nx, i₀ - 1) + i₀ = @inbounds iw.i_indices[i, j] + j₀ = @inbounds iw.j_indices[i, j] + + i₁ = ifelse(i₀ == 0, size(from_field, 1), i₀ - 1) j₁ = ifelse(j₀ == 0, j₀, j₀ - 1) i₂ = ifelse(i₀ == size(from_field, 1), 1, i₀ + 1) j₂ = ifelse(j₀ == size(from_field, 2), j₀, j₀ + 1) @inbounds begin - f₀₀ = from_field[i₀, j₀, k₀] - f₀₁ = from_field[i₀, j₁, k₀] - f₁₀ = from_field[i₁, j₀, k₀] - f₀₂ = from_field[i₀, j₂, k₀] - f₂₀ = from_field[i₂, j₀, k₀] - f₁₁ = from_field[i₁, j₁, k₀] - f₂₂ = from_field[i₂, j₂, k₀] - f₁₂ = from_field[i₁, j₂, k₀] - f₂₁ = from_field[i₂, j₁, k₀] + f₀₀ = from_field[i₀, j₀, k] + f₀₁ = from_field[i₀, j₁, k] + f₁₀ = from_field[i₁, j₀, k] + f₀₂ = from_field[i₀, j₂, k] + f₂₀ = from_field[i₂, j₀, k] + f₁₁ = from_field[i₁, j₁, k] + f₂₂ = from_field[i₂, j₂, k] + f₁₂ = from_field[i₁, j₂, k] + f₂₁ = from_field[i₂, j₁, k] w₀₀ = weights[i, j, 1] w₀₁ = weights[i, j, 2] @@ -176,6 +176,10 @@ end return F / W end +##### +##### Weight computation +##### + @inline function distance(x₁, y₁, x₂, y₂) dx = x₁ - x₂ dy = y₁ - y₂ From 6362c4939d765c6ea4b76069b003b84f6f955665 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Sat, 24 Aug 2024 18:08:19 -0400 Subject: [PATCH 25/42] again, better formatting --- src/interpolation.jl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/interpolation.jl b/src/interpolation.jl index 5460e54..7cf6ef6 100644 --- a/src/interpolation.jl +++ b/src/interpolation.jl @@ -138,7 +138,7 @@ end @inbounds to_field[i, j, k] = ϕ⁻ * (1 - ζ) + ϕ⁺ * ζ end -@inline function horizontal_interpolate(i, j, k, grid, from_field, weights) +@inline function horizontal_interpolate(i, j, k, grid, from_field, iw) i₀ = @inbounds iw.i_indices[i, j] j₀ = @inbounds iw.j_indices[i, j] @@ -159,15 +159,15 @@ end f₁₂ = from_field[i₁, j₂, k] f₂₁ = from_field[i₂, j₁, k] - w₀₀ = weights[i, j, 1] - w₀₁ = weights[i, j, 2] - w₁₀ = weights[i, j, 3] - w₀₂ = weights[i, j, 4] - w₂₀ = weights[i, j, 5] - w₁₁ = weights[i, j, 6] - w₂₂ = weights[i, j, 7] - w₁₂ = weights[i, j, 8] - w₂₁ = weights[i, j, 9] + w₀₀ = iw.weights[i, j, 1] + w₀₁ = iw.weights[i, j, 2] + w₁₀ = iw.weights[i, j, 3] + w₀₂ = iw.weights[i, j, 4] + w₂₀ = iw.weights[i, j, 5] + w₁₁ = iw.weights[i, j, 6] + w₂₂ = iw.weights[i, j, 7] + w₁₂ = iw.weights[i, j, 8] + w₂₁ = iw.weights[i, j, 9] end F = f₀₀ * w₀₀ + f₀₁ * w₀₁ + f₁₀ * w₁₀ + f₀₂ * w₀₂ + f₂₀ * w₂₀ + f₁₁ * w₁₁ + f₂₂ * w₂₂ + f₁₂ * w₁₂ + f₂₁ * w₂₁ From 44202de9c5de7d92f04855d1464d865668e30624 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Sat, 24 Aug 2024 18:11:51 -0400 Subject: [PATCH 26/42] add another fill halo --- src/interpolation.jl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/interpolation.jl b/src/interpolation.jl index 7cf6ef6..5569ef7 100644 --- a/src/interpolation.jl +++ b/src/interpolation.jl @@ -13,6 +13,9 @@ TRGField = Field{<:Any, <:Any, <:Any, <:Any, <:TRG} ##### Nearest Neighbor Interpolation from a Tripolar Field to a Latitude Longitude Field ##### +# This is a "naive" algorithm, intended only for visualization and zonal averages, not for more sofisticated diagnostics. +# Optimizations and improvements are welcome. + struct InterpolationWeights{LXT, LYT, LXF, LYF, I, J, W} i_indices :: I j_indices :: J @@ -106,6 +109,9 @@ function interpolate!(to_field, from_field::TRGField, interpolation_weigths = no to_arch = architecture(to_field) from_arch = architecture(from_field) + # Make sure `from_field` has the boundary conditions filled + fill_halo_regions!(from_field) + # In case architectures are `Distributed` we # verify that the fields are on the same child architecture to_arch = child_architecture(to_arch) @@ -198,8 +204,7 @@ end # # We assume that in an TRG, the latitude lines for a given i - index are sorted # # i.e. φ is monotone in j. This is not the case for λ that might jump between 0 and 360. @inline function horizontal_distances(λ₀, φ₀, loc, grid) - # This is a "naive" algorithm, so it is going to be quite slow! - # Optimizations are welcome! + λ = λnodes(grid, loc...; with_halos = true) φ = φnodes(grid, loc...; with_halos = true) From 19ced470d3eab90d512913cb09fd70b29a72b9d4 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Sat, 24 Aug 2024 18:21:37 -0400 Subject: [PATCH 27/42] bugfix --- src/interpolation.jl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/interpolation.jl b/src/interpolation.jl index 5569ef7..2a0da33 100644 --- a/src/interpolation.jl +++ b/src/interpolation.jl @@ -2,10 +2,14 @@ using OrthogonalSphericalShellGrids using OrthogonalSphericalShellGrids: TRG using Oceananigans using Oceananigans.Operators: Δx, Δy -using Oceananigans.Grids: λnodes, φnodes, λnode, φnode, znode +using Oceananigans.Grids: λnode, φnode, znode using Oceananigans.Fields: fractional_index, fractional_z_index, AbstractField, interpolator -import Oceananigans.Fields: interpolate! +import Oceananigans.Fields: interpolate!, λnodes, φnodes + +# TODO: Move to Oceananigans +@inline λnodes(ibg::ImmersedBoundaryGrid, args...; kwargs...) = λnodes(ibg.underlying_grid, args...; kwargs...) +@inline φnodes(ibg::ImmersedBoundaryGrid, args...; kwargs...) = φnodes(ibg.underlying_grid, args...; kwargs...) TRGField = Field{<:Any, <:Any, <:Any, <:Any, <:TRG} From 9686cadc62ebdf82fdc0b3614205716188d9e845 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Sat, 24 Aug 2024 18:59:15 -0400 Subject: [PATCH 28/42] better performance --- src/interpolation.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/interpolation.jl b/src/interpolation.jl index 2a0da33..0dbd26b 100644 --- a/src/interpolation.jl +++ b/src/interpolation.jl @@ -53,7 +53,7 @@ function InterpolationWeights(to_field, from_field::TRGField) to_loc = location(to_field) from_loc = location(from_field) - launch!(arch, to_grid, :xy, _compute_weights!, + launch!(arch, to_grid, (Nx, Ny), _compute_weights!, i_indices, j_indices, weights, to_grid, from_grid, map(instantiate, to_loc), map(instantiate, from_loc)) @@ -221,7 +221,8 @@ end @inbounds begin for i = 1:Nx - jⁿ = fractional_index(φ₀, φ[i, :], Ny) - 1 + φi = view(φ, i, :) + jⁿ = fractional_index(φ₀, φi, Ny) - 1 j⁻ = floor(Int, jⁿ) j⁺ = j⁻ + 1 From 1409c74072f94bd25f8b3ae9fc82b023ef1ca856 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Sat, 24 Aug 2024 19:05:18 -0400 Subject: [PATCH 29/42] bugfix --- src/interpolation.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interpolation.jl b/src/interpolation.jl index 0dbd26b..11ecbd3 100644 --- a/src/interpolation.jl +++ b/src/interpolation.jl @@ -43,7 +43,7 @@ function InterpolationWeights(to_field, from_field::TRGField) to_grid = to_field.grid from_grid = from_field.grid - Nx, Ny, _ = size(to_grid) + Nx, Ny, _ = size(to_field) arch = architecture(to_grid) i_indices = on_architecture(arch, zeros(Int, Nx, Ny)) From 78731de54343616f1cc68518a4ffa9e542c3b724 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Sun, 25 Aug 2024 17:45:58 -0400 Subject: [PATCH 30/42] diambiguate --- src/interpolation.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interpolation.jl b/src/interpolation.jl index 11ecbd3..fea7ad0 100644 --- a/src/interpolation.jl +++ b/src/interpolation.jl @@ -38,7 +38,7 @@ Adapt.adapt_structure(to, iw::InterpolationWeights{LXT, LYT, LXF, LYF}) where {L Adapt.adapt(to, iw.j_indices), Adapt.adapt(to, iw.weights)) -function InterpolationWeights(to_field, from_field::TRGField) +function InterpolationWeights(to_field::Field, from_field::TRGField) to_grid = to_field.grid from_grid = from_field.grid From 98cf57ab13bb466372553a336d2a8aa72996c783 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Mon, 26 Aug 2024 13:27:05 -0400 Subject: [PATCH 31/42] add diambiguation --- src/interpolation.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interpolation.jl b/src/interpolation.jl index fea7ad0..d8e8479 100644 --- a/src/interpolation.jl +++ b/src/interpolation.jl @@ -83,7 +83,7 @@ end end end -function interpolate!(to_field, from_field::TRGField, interpolation_weigths = nothing) +function interpolate!(to_field::Field, from_field::TRGField, interpolation_weigths = nothing) to_loc = location(to_field) from_loc = location(from_field) From deddf4fd75775253a38cfe41e7d355bf011a9f6a Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Mon, 26 Aug 2024 14:59:30 -0400 Subject: [PATCH 32/42] comment --- src/interpolation.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/interpolation.jl b/src/interpolation.jl index d8e8479..b0f80cf 100644 --- a/src/interpolation.jl +++ b/src/interpolation.jl @@ -278,6 +278,7 @@ end return i₀, j₀, d₀₀, d₀₁, d₁₀, d₀₂, d₂₀, d₁₁, d₂₂, d₁₂, d₂₁ end -# We assume that all points are very close to each other +# We assume that all points are very close to each other, so a longitude difference of 180 should not possible, +# this means that the same side of the globe, but that the longitude is displaced by 360 degrees. @inline massage_longitude(λ₀, λ) = ifelse(abs(λ₀ - λ) > 180, ifelse(λ₀ > 180, λ + 360, λ - 360), λ) \ No newline at end of file From 72ff2e0eb02c1e2b61074948efbf549f636d8b7b Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Mon, 2 Sep 2024 12:09:52 -0400 Subject: [PATCH 33/42] interpolate nothing in z --- src/interpolation.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/interpolation.jl b/src/interpolation.jl index b0f80cf..a7c3771 100644 --- a/src/interpolation.jl +++ b/src/interpolation.jl @@ -134,6 +134,13 @@ function interpolate!(to_field::Field, from_field::TRGField, interpolation_weigt return to_field end +@kernel function _nearest_neigbor_interpolate!(to_field, ::Tuple{<:Any, <:Any, <:Nothing}, + to_grid, from_field, from_loc, from_grid, interpolation_weights) + + i, j, k = @index(Global, NTuple) + @inbounds to_field[i, j, k] = horizontal_interpolate(i, j, k, from_grid, from_field, interpolation_weights) +end + @kernel function _nearest_neigbor_interpolate!(to_field, to_loc, to_grid, from_field, from_loc, from_grid, interpolation_weights) i, j, k = @index(Global, NTuple) From fab808f63cc4b1735af8e7ff282f7bd3fc6a09a1 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Mon, 9 Sep 2024 23:47:57 -0600 Subject: [PATCH 34/42] better numerics --- src/grid_utils.jl | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/src/grid_utils.jl b/src/grid_utils.jl index bed6ff3..aa93759 100644 --- a/src/grid_utils.jl +++ b/src/grid_utils.jl @@ -39,25 +39,8 @@ end Azᶜᶜᵃ[i, j] = spherical_area_quadrilateral(a, b, c, d) * radius^2 - a = lat_lon_to_cartesian(φᶜᶠᵃ[i-1, j ], λᶜᶠᵃ[i-1, j ], 1) - b = lat_lon_to_cartesian(φᶜᶠᵃ[ i , j ], λᶜᶠᵃ[ i , j ], 1) - c = lat_lon_to_cartesian(φᶜᶠᵃ[ i , j+1], λᶜᶠᵃ[ i , j+1], 1) - d = lat_lon_to_cartesian(φᶜᶠᵃ[i-1, j+1], λᶜᶠᵃ[i-1, j+1], 1) - - Azᶠᶜᵃ[i, j] = spherical_area_quadrilateral(a, b, c, d) * radius^2 - - a = lat_lon_to_cartesian(φᶠᶜᵃ[ i , j-1], λᶠᶜᵃ[ i , j-1], 1) - b = lat_lon_to_cartesian(φᶠᶜᵃ[i+1, j-1], λᶠᶜᵃ[i+1, j-1], 1) - c = lat_lon_to_cartesian(φᶠᶜᵃ[i+1, j ], λᶠᶜᵃ[i+1, j ], 1) - d = lat_lon_to_cartesian(φᶠᶜᵃ[ i , j ], λᶠᶜᵃ[ i , j ], 1) - - Azᶜᶠᵃ[i, j] = spherical_area_quadrilateral(a, b, c, d) * radius^2 - - a = lat_lon_to_cartesian(φᶜᶜᵃ[i-1, j-1], λᶜᶜᵃ[i-1, j-1], 1) - b = lat_lon_to_cartesian(φᶜᶜᵃ[ i , j-1], λᶜᶜᵃ[ i , j-1], 1) - c = lat_lon_to_cartesian(φᶜᶜᵃ[ i , j ], λᶜᶜᵃ[ i , j ], 1) - d = lat_lon_to_cartesian(φᶜᶜᵃ[i-1, j ], λᶜᶜᵃ[i-1, j ], 1) - - Azᶠᶠᵃ[i, j] = spherical_area_quadrilateral(a, b, c, d) * radius^2 + Azᶠᶜᵃ[i, j] = Δyᶠᶜᵃ[i, j] * Δxᶠᶜᵃ[i, j] + Azᶜᶠᵃ[i, j] = Δyᶜᶠᵃ[i, j] * Δxᶜᶠᵃ[i, j] + Azᶠᶠᵃ[i, j] = Δyᶠᶠᵃ[i, j] * Δxᶠᶠᵃ[i, j] end end From fa73dc6727bfa143f49914b243f2082b6c64491d Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Mon, 9 Sep 2024 23:48:18 -0600 Subject: [PATCH 35/42] better numerics --- src/grid_utils.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/grid_utils.jl b/src/grid_utils.jl index aa93759..50e52ab 100644 --- a/src/grid_utils.jl +++ b/src/grid_utils.jl @@ -38,7 +38,6 @@ end d = lat_lon_to_cartesian(φᶠᶠᵃ[ i , j+1], λᶠᶠᵃ[ i , j+1], 1) Azᶜᶜᵃ[i, j] = spherical_area_quadrilateral(a, b, c, d) * radius^2 - Azᶠᶜᵃ[i, j] = Δyᶠᶜᵃ[i, j] * Δxᶠᶜᵃ[i, j] Azᶜᶠᵃ[i, j] = Δyᶜᶠᵃ[i, j] * Δxᶜᶠᵃ[i, j] Azᶠᶠᵃ[i, j] = Δyᶠᶠᵃ[i, j] * Δxᶠᶠᵃ[i, j] From 54c6151297eed59a135a2241b69d72fb6304437c Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Mon, 16 Sep 2024 15:52:09 -0400 Subject: [PATCH 36/42] remove the interpolation part (that is another PR) --- src/interpolation.jl | 291 ------------------------------------------- 1 file changed, 291 deletions(-) delete mode 100644 src/interpolation.jl diff --git a/src/interpolation.jl b/src/interpolation.jl deleted file mode 100644 index a7c3771..0000000 --- a/src/interpolation.jl +++ /dev/null @@ -1,291 +0,0 @@ -using OrthogonalSphericalShellGrids -using OrthogonalSphericalShellGrids: TRG -using Oceananigans -using Oceananigans.Operators: Δx, Δy -using Oceananigans.Grids: λnode, φnode, znode -using Oceananigans.Fields: fractional_index, fractional_z_index, AbstractField, interpolator - -import Oceananigans.Fields: interpolate!, λnodes, φnodes - -# TODO: Move to Oceananigans -@inline λnodes(ibg::ImmersedBoundaryGrid, args...; kwargs...) = λnodes(ibg.underlying_grid, args...; kwargs...) -@inline φnodes(ibg::ImmersedBoundaryGrid, args...; kwargs...) = φnodes(ibg.underlying_grid, args...; kwargs...) - -TRGField = Field{<:Any, <:Any, <:Any, <:Any, <:TRG} - -##### -##### Nearest Neighbor Interpolation from a Tripolar Field to a Latitude Longitude Field -##### - -# This is a "naive" algorithm, intended only for visualization and zonal averages, not for more sofisticated diagnostics. -# Optimizations and improvements are welcome. - -struct InterpolationWeights{LXT, LYT, LXF, LYF, I, J, W} - i_indices :: I - j_indices :: J - weights :: W - - function InterpolationWeights{LXT, LYT, LXF, LYF}(i::I, j::J, w::W) where {LXT, LYT, LXF, LYF, I, J, W} - return new{LXT, LYT, LXF, LYF, I, J, W}(i, j, w) - end -end - -@inline from_location(::InterpolationWeights{LXT, LYT, LXF, LYF}) where {LXT, LYT, LXF, LYF} = (LXF, LYF) -@inline to_location(::InterpolationWeights{LXT, LYT, LXF, LYF}) where {LXT, LYT, LXF, LYF} = (LXT, LYT) - -Adapt.adapt_structure(to, iw::InterpolationWeights{LXT, LYT, LXF, LYF}) where {LXT, LYT, LXF, LYF} = - InterpolationWeights{LXT, LYT, LXF, LYF}(Adapt.adapt(to, iw.i_indices), - Adapt.adapt(to, iw.j_indices), - Adapt.adapt(to, iw.weights)) - -function InterpolationWeights(to_field::Field, from_field::TRGField) - - to_grid = to_field.grid - from_grid = from_field.grid - - Nx, Ny, _ = size(to_field) - arch = architecture(to_grid) - - i_indices = on_architecture(arch, zeros(Int, Nx, Ny)) - j_indices = on_architecture(arch, zeros(Int, Nx, Ny)) - weights = on_architecture(arch, zeros(eltype(to_grid), Nx, Ny, 9)) - - to_loc = location(to_field) - from_loc = location(from_field) - - launch!(arch, to_grid, (Nx, Ny), _compute_weights!, - i_indices, j_indices, weights, - to_grid, from_grid, map(instantiate, to_loc), map(instantiate, from_loc)) - - return InterpolationWeights{to_loc[1], to_loc[2], from_loc[1], from_loc[2]}(i_indices, j_indices, weights) -end - -@kernel function _compute_weights!(i_indices, j_indices, weights, to_grid, from_grid, to_loc, from_loc) - i, j = @index(Global, NTuple) - - λ₀ = λnode(i, j, 1, to_grid, to_loc...) - φ₀ = φnode(i, j, 1, to_grid, to_loc...) - i₀, j₀, d₀₀, d₀₁, d₁₀, d₀₂, d₂₀, d₁₁, d₂₂, d₁₂, d₂₁ = horizontal_distances(λ₀, φ₀, from_loc, from_grid) - - @inbounds begin - i_indices[i, j] = i₀ - j_indices[i, j] = j₀ - - weights[i, j, 1] = 1 / d₀₀ - weights[i, j, 2] = 1 / d₀₁ - weights[i, j, 3] = 1 / d₁₀ - weights[i, j, 4] = 1 / d₀₂ - weights[i, j, 5] = 1 / d₂₀ - weights[i, j, 6] = 1 / d₁₁ - weights[i, j, 7] = 1 / d₂₂ - weights[i, j, 8] = 1 / d₁₂ - weights[i, j, 9] = 1 / d₂₁ - end -end - -function interpolate!(to_field::Field, from_field::TRGField, interpolation_weigths = nothing) - - to_loc = location(to_field) - from_loc = location(from_field) - - # Make sure weigths are correct - # TODO: a check also on the grid (to and from) - if !(interpolation_weigths isa InterpolationWeights) - interpolation_weigths = InterpolationWeights(to_field, from_field) - else - # Check that the locations are correct - LXF, LYF = from_location(interpolation_weigths) - LXT, LYT = to_location(interpolation_weigths) - - correct_locations = (LXF, LYF) == from_loc[1:2] && (LXT, LYT) == to_loc[1:2] - - if !correct_locations - throw("The location of the interpolation weigths do not coincide with the locations of the in and out fields") - end - end - - to_loc = map(instantiate, to_loc) - from_loc = map(instantiate, from_loc) - - to_grid = to_field.grid - from_grid = from_field.grid - - to_arch = architecture(to_field) - from_arch = architecture(from_field) - - # Make sure `from_field` has the boundary conditions filled - fill_halo_regions!(from_field) - - # In case architectures are `Distributed` we - # verify that the fields are on the same child architecture - to_arch = child_architecture(to_arch) - from_arch = child_architecture(from_arch) - - if !isnothing(from_arch) && to_arch != from_arch - msg = "Cannot interpolate! because from_field is on $from_arch while to_field is on $to_arch." - throw(ArgumentError(msg)) - end - - launch!(to_arch, to_grid, size(to_field), - _nearest_neigbor_interpolate!, to_field, to_loc, to_grid, from_field, from_loc, from_grid, interpolation_weigths) - - fill_halo_regions!(to_field) - - return to_field -end - -@kernel function _nearest_neigbor_interpolate!(to_field, ::Tuple{<:Any, <:Any, <:Nothing}, - to_grid, from_field, from_loc, from_grid, interpolation_weights) - - i, j, k = @index(Global, NTuple) - @inbounds to_field[i, j, k] = horizontal_interpolate(i, j, k, from_grid, from_field, interpolation_weights) -end - -@kernel function _nearest_neigbor_interpolate!(to_field, to_loc, to_grid, from_field, from_loc, from_grid, interpolation_weights) - i, j, k = @index(Global, NTuple) - - z = znode(k, to_grid, to_loc[3]) - kk = fractional_z_index(z, from_loc, from_grid) - - k⁻, k⁺, ζ = interpolator(kk) - - ϕ⁻ = horizontal_interpolate(i, j, k⁻, from_grid, from_field, interpolation_weights) - ϕ⁺ = horizontal_interpolate(i, j, k⁺, from_grid, from_field, interpolation_weights) - - @inbounds to_field[i, j, k] = ϕ⁻ * (1 - ζ) + ϕ⁺ * ζ -end - -@inline function horizontal_interpolate(i, j, k, grid, from_field, iw) - - i₀ = @inbounds iw.i_indices[i, j] - j₀ = @inbounds iw.j_indices[i, j] - - i₁ = ifelse(i₀ == 0, size(from_field, 1), i₀ - 1) - j₁ = ifelse(j₀ == 0, j₀, j₀ - 1) - i₂ = ifelse(i₀ == size(from_field, 1), 1, i₀ + 1) - j₂ = ifelse(j₀ == size(from_field, 2), j₀, j₀ + 1) - - @inbounds begin - f₀₀ = from_field[i₀, j₀, k] - f₀₁ = from_field[i₀, j₁, k] - f₁₀ = from_field[i₁, j₀, k] - f₀₂ = from_field[i₀, j₂, k] - f₂₀ = from_field[i₂, j₀, k] - f₁₁ = from_field[i₁, j₁, k] - f₂₂ = from_field[i₂, j₂, k] - f₁₂ = from_field[i₁, j₂, k] - f₂₁ = from_field[i₂, j₁, k] - - w₀₀ = iw.weights[i, j, 1] - w₀₁ = iw.weights[i, j, 2] - w₁₀ = iw.weights[i, j, 3] - w₀₂ = iw.weights[i, j, 4] - w₂₀ = iw.weights[i, j, 5] - w₁₁ = iw.weights[i, j, 6] - w₂₂ = iw.weights[i, j, 7] - w₁₂ = iw.weights[i, j, 8] - w₂₁ = iw.weights[i, j, 9] - end - - F = f₀₀ * w₀₀ + f₀₁ * w₀₁ + f₁₀ * w₁₀ + f₀₂ * w₀₂ + f₂₀ * w₂₀ + f₁₁ * w₁₁ + f₂₂ * w₂₂ + f₁₂ * w₁₂ + f₂₁ * w₂₁ - W = w₀₀ + w₀₁ + w₁₀ + w₀₂ + w₂₀ + w₁₁ + w₂₂ + w₁₂ + w₂₁ - - return F / W -end - -##### -##### Weight computation -##### - -@inline function distance(x₁, y₁, x₂, y₂) - dx = x₁ - x₂ - dy = y₁ - y₂ - return dx * dx + dy * dy -end - -@inline function check_and_update(dist, i₀, j₀, i, j, λ₀, φ₀, λ, φ) - d = distance(λ₀, φ₀, λ , φ) - i₀ = ifelse(d < dist, i, i₀) - j₀ = ifelse(d < dist, j, j₀) - dist = min(d, dist) - - return dist, i₀, j₀ -end - -# # We assume that in an TRG, the latitude lines for a given i - index are sorted -# # i.e. φ is monotone in j. This is not the case for λ that might jump between 0 and 360. -@inline function horizontal_distances(λ₀, φ₀, loc, grid) - - λ = λnodes(grid, loc...; with_halos = true) - φ = φnodes(grid, loc...; with_halos = true) - - Nx, Ny, _ = size(grid) - - # We search for an initial valid option - dist = Inf - i₀ = 1 - j₀ = 1 - - @inbounds begin - for i = 1:Nx - φi = view(φ, i, :) - jⁿ = fractional_index(φ₀, φi, Ny) - 1 - j⁻ = floor(Int, jⁿ) - j⁺ = j⁻ + 1 - - if j⁻ <= grid.Ny - dist, i₀, j₀ = check_and_update(dist, i₀, j₀, i, j⁻, λ₀, φ₀, λ[i, j⁻], φ[i, j⁻]) - end - - if j⁺ <= grid.Ny - dist, i₀, j₀ = check_and_update(dist, i₀, j₀, i, j⁺, λ₀, φ₀, λ[i, j⁺], φ[i, j⁺]) - end - end - end - - # Now find the closest neighbors given i₀ and j₀ - i₁ = ifelse(i₀ == 0, grid.Nx, i₀ - 1) - j₁ = ifelse(j₀ == 0, j₀, j₀ - 1) - i₂ = ifelse(i₀ == size(λ, 1), 1, i₀ + 1) - j₂ = ifelse(j₀ == size(λ, 2), j₀, j₀ + 1) - - @inbounds begin - λ₀₀ = massage_longitude(λ₀, λ[i₀, j₀]) - λ₀₁ = massage_longitude(λ₀, λ[i₀, j₁]) - λ₁₀ = massage_longitude(λ₀, λ[i₁, j₀]) - λ₀₂ = massage_longitude(λ₀, λ[i₀, j₂]) - λ₂₀ = massage_longitude(λ₀, λ[i₂, j₀]) - λ₁₁ = massage_longitude(λ₀, λ[i₁, j₁]) - λ₂₂ = massage_longitude(λ₀, λ[i₂, j₂]) - λ₁₂ = massage_longitude(λ₀, λ[i₁, j₂]) - λ₂₁ = massage_longitude(λ₀, λ[i₂, j₁]) - - φ₀₀ = φ[i₀, j₀] - φ₀₁ = φ[i₀, j₁] - φ₁₀ = φ[i₁, j₀] - φ₀₂ = φ[i₀, j₂] - φ₂₀ = φ[i₂, j₀] - φ₁₁ = φ[i₁, j₁] - φ₂₂ = φ[i₂, j₂] - φ₁₂ = φ[i₁, j₂] - φ₂₁ = φ[i₂, j₁] - end - - d₀₀ = distance(λ₀, φ₀, λ₀₀, φ₀₀) - d₀₁ = distance(λ₀, φ₀, λ₀₁, φ₀₁) - d₁₀ = distance(λ₀, φ₀, λ₁₀, φ₁₀) - d₀₂ = distance(λ₀, φ₀, λ₀₂, φ₀₂) - d₂₀ = distance(λ₀, φ₀, λ₂₀, φ₂₀) - - d₁₁ = distance(λ₀, φ₀, λ₁₁, φ₁₁) - d₂₂ = distance(λ₀, φ₀, λ₂₂, φ₂₂) - d₁₂ = distance(λ₀, φ₀, λ₁₂, φ₁₂) - d₂₁ = distance(λ₀, φ₀, λ₂₁, φ₂₁) - - return i₀, j₀, d₀₀, d₀₁, d₁₀, d₀₂, d₂₀, d₁₁, d₂₂, d₁₂, d₂₁ -end - -# We assume that all points are very close to each other, so a longitude difference of 180 should not possible, -# this means that the same side of the globe, but that the longitude is displaced by 360 degrees. -@inline massage_longitude(λ₀, λ) = ifelse(abs(λ₀ - λ) > 180, - ifelse(λ₀ > 180, λ + 360, λ - 360), λ) \ No newline at end of file From f067ec620f9fb0ca83d8db16e3b10ec0b20b04c3 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Mon, 16 Sep 2024 15:54:41 -0400 Subject: [PATCH 37/42] adding tests --- test/test_distributed_tripolar.jl | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 test/test_distributed_tripolar.jl diff --git a/test/test_distributed_tripolar.jl b/test/test_distributed_tripolar.jl new file mode 100644 index 0000000..a1a78b2 --- /dev/null +++ b/test/test_distributed_tripolar.jl @@ -0,0 +1,5 @@ + +@testset "Test distributed TripolarGrid..." begin + md + +end \ No newline at end of file From 56a77be914094d63a662ce6fed78c31a77e19951 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Mon, 16 Sep 2024 16:29:03 -0400 Subject: [PATCH 38/42] starting with some tests --- distributed_grid.jl | 26 +++++++++++++ src/OrthogonalSphericalShellGrids.jl | 1 - test/dependencies_for_runtests.jl | 12 ++++++ test/distributed_tests_utils.jl | 35 +++++++++++++++++ test/test_distributed_tripolar.jl | 56 +++++++++++++++++++++++++++- 5 files changed, 128 insertions(+), 2 deletions(-) create mode 100644 distributed_grid.jl create mode 100644 test/dependencies_for_runtests.jl create mode 100644 test/distributed_tests_utils.jl diff --git a/distributed_grid.jl b/distributed_grid.jl new file mode 100644 index 0000000..0d69d0c --- /dev/null +++ b/distributed_grid.jl @@ -0,0 +1,26 @@ + using OrthogonalSphericalShellGrids + using Oceananigans + using MPI + MPI.Init() + + include("test/distributed_tests_utils.jl") + arch = Distributed(CPU(), partition = Partition(2, 2)) + + distributed_grid = TripolarGrid(arch; size = (100, 100, 1), z = (-1000, 0)) + distributed_grid = mask_singularities(distributed_grid) + + run_tripolar_simulation(distributed_grid) + + if arch.local_rank == 0 + η = reconstruct_global_field(model.free_surface.η) + u = reconstruct_global_field(model.velocities.u) + v = reconstruct_global_field(model.velocities.v) + + fill_halo_regions!(η) + fill_halo_regions!(u) + fill_halo_regions!(v) + jldsave("distributed_tripolar.jld2"; η = η.data, u = u.data, v = v.data) + end + + MPI.Barrier(MPI.COMM_WORLD) + MPI.Finalize() diff --git a/src/OrthogonalSphericalShellGrids.jl b/src/OrthogonalSphericalShellGrids.jl index 229feee..9c8e786 100644 --- a/src/OrthogonalSphericalShellGrids.jl +++ b/src/OrthogonalSphericalShellGrids.jl @@ -33,6 +33,5 @@ include("distributed_zipper.jl") include("distributed_zipper_north_tags.jl") include("with_halo.jl") include("split_explicit_free_surface.jl") -include("interpolation.jl") end diff --git a/test/dependencies_for_runtests.jl b/test/dependencies_for_runtests.jl new file mode 100644 index 0000000..cd72b1e --- /dev/null +++ b/test/dependencies_for_runtests.jl @@ -0,0 +1,12 @@ +using OrthogonalSphericalShellGrids +using Oceananigans +using Oceananigans.Grids: halo_size +using Oceananigans.Utils +using Oceananigans.BoundaryConditions +using OrthogonalSphericalShellGrids: get_cartesian_nodes_and_vertices +using Oceananigans.CUDA +using Test + +using KernelAbstractions: @kernel, @index + +arch = CUDA.has_cuda_gpu() ? GPU() : CPU() diff --git a/test/distributed_tests_utils.jl b/test/distributed_tests_utils.jl new file mode 100644 index 0000000..0a1f344 --- /dev/null +++ b/test/distributed_tests_utils.jl @@ -0,0 +1,35 @@ + +function run_tripolar_simulation(grid) + + model = HydrostaticFreeSurfaceModel(; grid = grid, + free_surface = SplitExplicitFreeSurface(grid; substeps = 20), + tracers = (), + buoyancy = nothing, + coriolis = HydrostaticSphericalCoriolis()) + + # Setup the model with a gaussian profile near the physical north poles + ηᵢ(λ, φ, z) = exp(- (φ - 90)^2 / 10^2) + + set!(model, η = (x, y, z) -> exp( - (x - λp)^2 / 10^2 - (y - φp)^2 / 10^2) * 0.1) + + simulation = Simulation(model, Δt = 1minutes, stop_iteration = 100) + + outputs = merge(model.velocities, (; η = model.free_surface.η)) + + run!(simulation) + + return nothing +end + +function mask_singularities(underlying_grid) + λp = underlying_grid.conformal_mapping.first_pole_longitude + φp = underlying_grid.conformal_mapping.north_poles_latitude + + # We need a bottom height field that ``masks'' the singularities + bottom_height(λ, φ) = ((abs(λ - λp) < 5) & (abs(φp - φ) < 5)) | + ((abs(λ - λp - 180) < 5) & (abs(φp - φ) < 5)) | (φ < -80) ? 0 : - 1000 + + grid = ImmersedBoundaryGrid(underlying_grid, GridFittedBottom(bottom_height)) + + return grid +end diff --git a/test/test_distributed_tripolar.jl b/test/test_distributed_tripolar.jl index a1a78b2..eaf13dc 100644 --- a/test/test_distributed_tripolar.jl +++ b/test/test_distributed_tripolar.jl @@ -1,5 +1,59 @@ +using MPI + +run_distributed_grid = """ + using OrthogonalSphericalShellGrids + using Oceananigans + using MPI + MPI.Init() + + include("test/distributed_tests_utils.jl") + arch = Distributed(CPU(), partition = Partition(2, 2)) + + distributed_grid = TripolarGrid(arch; size = (100, 100, 1), z = (-1000, 0)) + distributed_grid = mask_singularities(distributed_grid) + + run_tripolar_simulation(distributed_grid) + + if arch.local_rank == 0 + η = reconstruct_global_field(model.free_surface.η) + u = reconstruct_global_field(model.velocities.u) + v = reconstruct_global_field(model.velocities.v) + + fill_halo_regions!(η) + fill_halo_regions!(u) + fill_halo_regions!(v) + jldsave("distributed_tripolar.jld2"; η = η.data, u = u.data, v = v.data) + end + + MPI.Barrier(MPI.COMM_WORLD) + MPI.Finalize() +""" @testset "Test distributed TripolarGrid..." begin - md + write("distributed_grid.jl", run_distributed_grid) + + mpiexec(cmd -> run(`$cmd -n 4 julia --project distributed_grid.jl`)) + + arch = CPU() + + grid = TripolarGrid(arch; size = (100, 100, 1), z = (-1000, 0)) + λp = grid.conformal_mapping.first_pole_longitude + φp = grid.conformal_mapping.north_poles_latitude + + grid = mask_singularities(grid) + + run_tripolar_simulation(grid) + + # Serial quantities + us, vs, ws = model.velocities + ηs = model.free_surface.η + + # Parallel quantities + up = jldopen("distributed_tripolar.jld2")["u"] + vp = jldopen("distributed_tripolar.jld2")["u"] + ηp = jldopen("distributed_tripolar.jld2")["u"] + @test us.data ≈ up + @test vs.data ≈ vp + @test ηs.data ≈ ηp end \ No newline at end of file From c14171d09aadee6c5ae0205ca9dabef49c51b692 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Mon, 16 Sep 2024 16:30:19 -0400 Subject: [PATCH 39/42] small fix --- test/distributed_tests_utils.jl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test/distributed_tests_utils.jl b/test/distributed_tests_utils.jl index 0a1f344..561bdd2 100644 --- a/test/distributed_tests_utils.jl +++ b/test/distributed_tests_utils.jl @@ -2,10 +2,11 @@ function run_tripolar_simulation(grid) model = HydrostaticFreeSurfaceModel(; grid = grid, - free_surface = SplitExplicitFreeSurface(grid; substeps = 20), - tracers = (), - buoyancy = nothing, - coriolis = HydrostaticSphericalCoriolis()) + free_surface = SplitExplicitFreeSurface(grid; substeps = 20), + tracers = (), + buoyancy = nothing, + momentum_advection = VectorInvariant(), + coriolis = HydrostaticSphericalCoriolis()) # Setup the model with a gaussian profile near the physical north poles ηᵢ(λ, φ, z) = exp(- (φ - 90)^2 / 10^2) From 8120584fd847f24c521b8697a36df137ba32a69d Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Mon, 16 Sep 2024 16:46:30 -0400 Subject: [PATCH 40/42] add the distributed test --- test/distributed_tests_utils.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/distributed_tests_utils.jl b/test/distributed_tests_utils.jl index 561bdd2..456ac2d 100644 --- a/test/distributed_tests_utils.jl +++ b/test/distributed_tests_utils.jl @@ -1,3 +1,5 @@ +using Oceananigans +using Oceananigans.Units function run_tripolar_simulation(grid) @@ -11,12 +13,10 @@ function run_tripolar_simulation(grid) # Setup the model with a gaussian profile near the physical north poles ηᵢ(λ, φ, z) = exp(- (φ - 90)^2 / 10^2) - set!(model, η = (x, y, z) -> exp( - (x - λp)^2 / 10^2 - (y - φp)^2 / 10^2) * 0.1) + set!(model, η = ηᵢ) simulation = Simulation(model, Δt = 1minutes, stop_iteration = 100) - outputs = merge(model.velocities, (; η = model.free_surface.η)) - run!(simulation) return nothing From a885ecacee1bde544703d02d27e7249b5c951de0 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Mon, 16 Sep 2024 16:54:09 -0400 Subject: [PATCH 41/42] fix tests --- distributed_grid.jl | 8 ++++---- test/distributed_tests_utils.jl | 2 +- test/test_distributed_tripolar.jl | 34 +++++++++++++++---------------- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/distributed_grid.jl b/distributed_grid.jl index 0d69d0c..5ce42c8 100644 --- a/distributed_grid.jl +++ b/distributed_grid.jl @@ -9,12 +9,12 @@ distributed_grid = TripolarGrid(arch; size = (100, 100, 1), z = (-1000, 0)) distributed_grid = mask_singularities(distributed_grid) - run_tripolar_simulation(distributed_grid) + simulation = run_tripolar_simulation(distributed_grid) if arch.local_rank == 0 - η = reconstruct_global_field(model.free_surface.η) - u = reconstruct_global_field(model.velocities.u) - v = reconstruct_global_field(model.velocities.v) + η = reconstruct_global_field(simulation.model.free_surface.η) + u = reconstruct_global_field(simulation.model.velocities.u) + v = reconstruct_global_field(simulation.model.velocities.v) fill_halo_regions!(η) fill_halo_regions!(u) diff --git a/test/distributed_tests_utils.jl b/test/distributed_tests_utils.jl index 456ac2d..7b365bc 100644 --- a/test/distributed_tests_utils.jl +++ b/test/distributed_tests_utils.jl @@ -19,7 +19,7 @@ function run_tripolar_simulation(grid) run!(simulation) - return nothing + return simulation end function mask_singularities(underlying_grid) diff --git a/test/test_distributed_tripolar.jl b/test/test_distributed_tripolar.jl index eaf13dc..e937e28 100644 --- a/test/test_distributed_tripolar.jl +++ b/test/test_distributed_tripolar.jl @@ -12,12 +12,12 @@ run_distributed_grid = """ distributed_grid = TripolarGrid(arch; size = (100, 100, 1), z = (-1000, 0)) distributed_grid = mask_singularities(distributed_grid) - run_tripolar_simulation(distributed_grid) + simulation = run_tripolar_simulation(distributed_grid) if arch.local_rank == 0 - η = reconstruct_global_field(model.free_surface.η) - u = reconstruct_global_field(model.velocities.u) - v = reconstruct_global_field(model.velocities.v) + η = reconstruct_global_field(simulation.model.free_surface.η) + u = reconstruct_global_field(simulation.model.velocities.u) + v = reconstruct_global_field(simulation.model.velocities.v) fill_halo_regions!(η) fill_halo_regions!(u) @@ -30,28 +30,26 @@ run_distributed_grid = """ """ @testset "Test distributed TripolarGrid..." begin - write("distributed_grid.jl", run_distributed_grid) + # Run the distributed grid simulation + write("distributed_grid.jl", run_distributed_grid) mpiexec(cmd -> run(`$cmd -n 4 julia --project distributed_grid.jl`)) + rm("distributed_grid.jl") - arch = CPU() - - grid = TripolarGrid(arch; size = (100, 100, 1), z = (-1000, 0)) - λp = grid.conformal_mapping.first_pole_longitude - φp = grid.conformal_mapping.north_poles_latitude - + # Run the serial computation + grid = TripolarGrid(size = (100, 100, 1), z = (-1000, 0)) grid = mask_singularities(grid) - run_tripolar_simulation(grid) + simulation = run_tripolar_simulation(grid) - # Serial quantities - us, vs, ws = model.velocities - ηs = model.free_surface.η + # Retrieve Serial quantities + us, vs, ws = simulation.model.velocities + ηs = simulation.model.free_surface.η - # Parallel quantities + # Retrieve Parallel quantities up = jldopen("distributed_tripolar.jld2")["u"] - vp = jldopen("distributed_tripolar.jld2")["u"] - ηp = jldopen("distributed_tripolar.jld2")["u"] + vp = jldopen("distributed_tripolar.jld2")["v"] + ηp = jldopen("distributed_tripolar.jld2")["η"] @test us.data ≈ up @test vs.data ≈ vp From 0b79a07648a3965cc2d96eb29bc35d6a862154e8 Mon Sep 17 00:00:00 2001 From: Simone Silvestri Date: Mon, 16 Sep 2024 16:59:26 -0400 Subject: [PATCH 42/42] fix the test --- test/test_distributed_tripolar.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_distributed_tripolar.jl b/test/test_distributed_tripolar.jl index e937e28..aa6834e 100644 --- a/test/test_distributed_tripolar.jl +++ b/test/test_distributed_tripolar.jl @@ -3,6 +3,7 @@ using MPI run_distributed_grid = """ using OrthogonalSphericalShellGrids using Oceananigans + using Oceananigans.DistributedComputations: reconstruct_global_field using MPI MPI.Init()