diff --git a/src/OrthogonalSphericalShellGrids.jl b/src/OrthogonalSphericalShellGrids.jl
index 6679e1b..9c8e786 100644
--- a/src/OrthogonalSphericalShellGrids.jl
+++ b/src/OrthogonalSphericalShellGrids.jl
@@ -12,7 +12,8 @@ using Oceananigans.Grids: R_Earth,
                           halo_size, spherical_area_quadrilateral,
                           lat_lon_to_cartesian, generate_coordinate, topology
 using Oceananigans.Operators
-using Oceananigans.Utils: get_cartesian_nodes_and_vertices
+
+using Oceananigans.Utils: get_cartesian_nodes_and_vertices                       
 
 using Adapt 
 using JLD2
@@ -28,6 +29,8 @@ include("generate_tripolar_coordinates.jl")
 include("tripolar_grid.jl")
 include("grid_extensions.jl")
 include("distributed_tripolar_grid.jl")
+include("distributed_zipper.jl")
+include("distributed_zipper_north_tags.jl")
 include("with_halo.jl")
 include("split_explicit_free_surface.jl")
 
diff --git a/src/distributed_tripolar_grid.jl b/src/distributed_tripolar_grid.jl
index 3387e57..b7f9186 100644
--- a/src/distributed_tripolar_grid.jl
+++ b/src/distributed_tripolar_grid.jl
@@ -1,6 +1,9 @@
+using MPI
+using Oceananigans.BoundaryConditions: DistributedCommunicationBoundaryCondition
 using Oceananigans.DistributedComputations
 using Oceananigans.DistributedComputations: local_size,
                                             barrier!,
+                                            all_reduce,
                                             ranks,
                                             inject_halo_communication_boundary_conditions,
                                             concatenate_local_sizes
@@ -26,9 +29,23 @@ function TripolarGrid(arch::Distributed, FT::DataType=Float64;
                       kwargs...)
 
     workers = ranks(arch.partition)
+    px = ifelse(isnothing(arch.partition.x), 1, arch.partition.x)
+    py = ifelse(isnothing(arch.partition.y), 1, arch.partition.y)
 
-    workers[1] != 1 &&
-        throw(ArgumentError("The tripolar grid is supported only on a Y-partitioning configuration"))
+    # Check that partitioning in x is correct:
+    try
+        if isodd(px) && (px != 1)
+            throw(ArgumentError("Only even partitioning in x is supported with the TripolarGrid"))
+        end
+    catch 
+        throw(ArgumentError("The x partition $(px) is not supported. The partition in x must be an even number. "))
+    end
+
+    # a slab decomposition in x is not supported
+    if px != 1 && py == 1
+        throw(ArgumentError("A x-only partitioning is not supported with the TripolarGrid. \n 
+                            Please, use a y partitioning configuration or a x-y pencil partitioning."))
+    end
 
     Hx, Hy, Hz = halo
 
@@ -41,39 +58,49 @@ function TripolarGrid(arch::Distributed, FT::DataType=Float64;
     lsize = local_size(arch, global_size)
 
     # Extracting the local range
-    nlocal = concatenate_local_sizes(lsize, arch, 2)
-    rank = arch.local_rank
-
-    jstart = 1 + sum(nlocal[1:rank])
-    jend = rank == workers[2] - 1 ? Ny : sum(nlocal[1:rank+1])
+    nylocal = concatenate_local_sizes(lsize, arch, 2)
+    nxlocal = concatenate_local_sizes(lsize, arch, 1)
+    yrank   = ifelse(isnothing(arch.partition.x), 0, arch.local_index[2] - 1)
+    xrank   = ifelse(isnothing(arch.partition.x), 0, arch.local_index[1] - 1)
+    
+    # The j-range
+    jstart = 1 + sum(nylocal[1:yrank])
+    jend = yrank == workers[2] - 1 ? Ny : sum(nylocal[1:yrank+1])
     jrange = jstart-Hy:jend+Hy
 
+    # The i-range
+    istart = 1 + sum(nxlocal[1:xrank])
+    iend = xrank == workers[1] - 1 ? Nx : sum(nxlocal[1:xrank+1])
+    irange = istart-Hx:iend+Hx
+
     # Partitioning the Coordinates
-    λᶠᶠᵃ = partition_tripolar_metric(global_grid, :λᶠᶠᵃ, jrange)
-    φᶠᶠᵃ = partition_tripolar_metric(global_grid, :φᶠᶠᵃ, jrange)
-    λᶠᶜᵃ = partition_tripolar_metric(global_grid, :λᶠᶜᵃ, jrange)
-    φᶠᶜᵃ = partition_tripolar_metric(global_grid, :φᶠᶜᵃ, jrange)
-    λᶜᶠᵃ = partition_tripolar_metric(global_grid, :λᶜᶠᵃ, jrange)
-    φᶜᶠᵃ = partition_tripolar_metric(global_grid, :φᶜᶠᵃ, jrange)
-    λᶜᶜᵃ = partition_tripolar_metric(global_grid, :λᶜᶜᵃ, jrange)
-    φᶜᶜᵃ = partition_tripolar_metric(global_grid, :φᶜᶜᵃ, jrange)
+    λᶠᶠᵃ = partition_tripolar_metric(global_grid, :λᶠᶠᵃ, irange, jrange)
+    φᶠᶠᵃ = partition_tripolar_metric(global_grid, :φᶠᶠᵃ, irange, jrange)
+    λᶠᶜᵃ = partition_tripolar_metric(global_grid, :λᶠᶜᵃ, irange, jrange)
+    φᶠᶜᵃ = partition_tripolar_metric(global_grid, :φᶠᶜᵃ, irange, jrange)
+    λᶜᶠᵃ = partition_tripolar_metric(global_grid, :λᶜᶠᵃ, irange, jrange)
+    φᶜᶠᵃ = partition_tripolar_metric(global_grid, :φᶜᶠᵃ, irange, jrange)
+    λᶜᶜᵃ = partition_tripolar_metric(global_grid, :λᶜᶜᵃ, irange, jrange)
+    φᶜᶜᵃ = partition_tripolar_metric(global_grid, :φᶜᶜᵃ, irange, jrange)
 
     # Partitioning the Metrics
-    Δxᶜᶜᵃ = partition_tripolar_metric(global_grid, :Δxᶜᶜᵃ, jrange)
-    Δxᶠᶜᵃ = partition_tripolar_metric(global_grid, :Δxᶠᶜᵃ, jrange)
-    Δxᶜᶠᵃ = partition_tripolar_metric(global_grid, :Δxᶜᶠᵃ, jrange)
-    Δxᶠᶠᵃ = partition_tripolar_metric(global_grid, :Δxᶠᶠᵃ, jrange)
-    Δyᶜᶜᵃ = partition_tripolar_metric(global_grid, :Δyᶜᶜᵃ, jrange)
-    Δyᶠᶜᵃ = partition_tripolar_metric(global_grid, :Δyᶠᶜᵃ, jrange)
-    Δyᶜᶠᵃ = partition_tripolar_metric(global_grid, :Δyᶜᶠᵃ, jrange)
-    Δyᶠᶠᵃ = partition_tripolar_metric(global_grid, :Δyᶠᶠᵃ, jrange)
-    Azᶜᶜᵃ = partition_tripolar_metric(global_grid, :Azᶜᶜᵃ, jrange)
-    Azᶠᶜᵃ = partition_tripolar_metric(global_grid, :Azᶠᶜᵃ, jrange)
-    Azᶜᶠᵃ = partition_tripolar_metric(global_grid, :Azᶜᶠᵃ, jrange)
-    Azᶠᶠᵃ = partition_tripolar_metric(global_grid, :Azᶠᶠᵃ, jrange)
-
-    LY = rank == 0 ? RightConnected : FullyConnected
-    ny = nlocal[rank+1]
+    Δxᶜᶜᵃ = partition_tripolar_metric(global_grid, :Δxᶜᶜᵃ, irange, jrange)
+    Δxᶠᶜᵃ = partition_tripolar_metric(global_grid, :Δxᶠᶜᵃ, irange, jrange)
+    Δxᶜᶠᵃ = partition_tripolar_metric(global_grid, :Δxᶜᶠᵃ, irange, jrange)
+    Δxᶠᶠᵃ = partition_tripolar_metric(global_grid, :Δxᶠᶠᵃ, irange, jrange)
+    Δyᶜᶜᵃ = partition_tripolar_metric(global_grid, :Δyᶜᶜᵃ, irange, jrange)
+    Δyᶠᶜᵃ = partition_tripolar_metric(global_grid, :Δyᶠᶜᵃ, irange, jrange)
+    Δyᶜᶠᵃ = partition_tripolar_metric(global_grid, :Δyᶜᶠᵃ, irange, jrange)
+    Δyᶠᶠᵃ = partition_tripolar_metric(global_grid, :Δyᶠᶠᵃ, irange, jrange)
+    Azᶜᶜᵃ = partition_tripolar_metric(global_grid, :Azᶜᶜᵃ, irange, jrange)
+    Azᶠᶜᵃ = partition_tripolar_metric(global_grid, :Azᶠᶜᵃ, irange, jrange)
+    Azᶜᶠᵃ = partition_tripolar_metric(global_grid, :Azᶜᶠᵃ, irange, jrange)
+    Azᶠᶠᵃ = partition_tripolar_metric(global_grid, :Azᶠᶠᵃ, irange, jrange)
+
+    LY = yrank == 0 ? RightConnected : FullyConnected
+    LX = workers[1] == 1 ? Periodic : FullyConnected
+    ny = nylocal[yrank+1]
+    nx = nxlocal[xrank+1]
 
     zᵃᵃᶜ   = global_grid.zᵃᵃᶜ
     zᵃᵃᶠ   = global_grid.zᵃᵃᶠ
@@ -81,75 +108,151 @@ function TripolarGrid(arch::Distributed, FT::DataType=Float64;
     Δzᵃᵃᶠ  = global_grid.Δzᵃᵃᶠ
     radius = global_grid.radius
 
-    grid = OrthogonalSphericalShellGrid{Periodic, LY, Bounded}(arch,
-                                                               Nx, ny, Nz,
-                                                               Hx, Hy, Hz,
-                                                               convert(eltype(radius), global_grid.Lz),
-                                                               on_architecture(arch, λᶜᶜᵃ),
-                                                               on_architecture(arch, λᶠᶜᵃ),
-                                                               on_architecture(arch, λᶜᶠᵃ),
-                                                               on_architecture(arch, λᶠᶠᵃ),
-                                                               on_architecture(arch, φᶜᶜᵃ),
-                                                               on_architecture(arch, φᶠᶜᵃ),
-                                                               on_architecture(arch, φᶜᶠᵃ),
-                                                               on_architecture(arch, φᶠᶠᵃ),
-                                                               on_architecture(arch, zᵃᵃᶜ),
-                                                               on_architecture(arch, zᵃᵃᶠ),
-                                                               on_architecture(arch, Δxᶜᶜᵃ),
-                                                               on_architecture(arch, Δxᶠᶜᵃ),
-                                                               on_architecture(arch, Δxᶜᶠᵃ),
-                                                               on_architecture(arch, Δxᶠᶠᵃ),
-                                                               on_architecture(arch, Δyᶜᶜᵃ),
-                                                               on_architecture(arch, Δyᶜᶠᵃ),
-                                                               on_architecture(arch, Δyᶠᶜᵃ),
-                                                               on_architecture(arch, Δyᶠᶠᵃ),
-                                                               on_architecture(arch, Δzᵃᵃᶜ),
-                                                               on_architecture(arch, Δzᵃᵃᶠ),
-                                                               on_architecture(arch, Azᶜᶜᵃ),
-                                                               on_architecture(arch, Azᶠᶜᵃ),
-                                                               on_architecture(arch, Azᶜᶠᵃ),
-                                                               on_architecture(arch, Azᶠᶠᵃ),
-                                                               radius,
-                                                               global_grid.conformal_mapping)
+    # Fix corners halos passing in case workers[1] != 1 
+    if  workers[1] != 1 
+        northwest_idx_x = ranks(arch)[1] - arch.local_index[1] + 2
+        northeast_idx_x = ranks(arch)[1] - arch.local_index[1] 
+        
+        if northwest_idx_x > workers[1]
+            northwest_idx_x = arch.local_index[1]
+        end
+
+        if northeast_idx_x < 1
+            northeast_idx_x = arch.local_index[1]
+        end
+
+        # Make sure the northwest and northeast connectivities are correct
+        northwest_recv_rank = receiving_rank(arch; receive_idx_x = northwest_idx_x)
+        northeast_recv_rank = receiving_rank(arch; receive_idx_x = northeast_idx_x)
+        north_recv_rank     = receiving_rank(arch)
+
+        if yrank == workers[2] - 1
+            arch.connectivity.northwest = northwest_recv_rank
+            arch.connectivity.northeast = northeast_recv_rank
+            arch.connectivity.north     = north_recv_rank
+        end
+    end
+
+    grid = OrthogonalSphericalShellGrid{LX, LY, Bounded}(arch,
+                                                         nx, ny, Nz,
+                                                         Hx, Hy, Hz,
+                                                         convert(eltype(radius), global_grid.Lz),
+                                                         on_architecture(arch, λᶜᶜᵃ),
+                                                         on_architecture(arch, λᶠᶜᵃ),
+                                                         on_architecture(arch, λᶜᶠᵃ),
+                                                         on_architecture(arch, λᶠᶠᵃ),
+                                                         on_architecture(arch, φᶜᶜᵃ),
+                                                         on_architecture(arch, φᶠᶜᵃ),
+                                                         on_architecture(arch, φᶜᶠᵃ),
+                                                         on_architecture(arch, φᶠᶠᵃ),
+                                                         on_architecture(arch, zᵃᵃᶜ),
+                                                         on_architecture(arch, zᵃᵃᶠ),
+                                                         on_architecture(arch, Δxᶜᶜᵃ),
+                                                         on_architecture(arch, Δxᶠᶜᵃ),
+                                                         on_architecture(arch, Δxᶜᶠᵃ),
+                                                         on_architecture(arch, Δxᶠᶠᵃ),
+                                                         on_architecture(arch, Δyᶜᶜᵃ),
+                                                         on_architecture(arch, Δyᶜᶠᵃ),
+                                                         on_architecture(arch, Δyᶠᶜᵃ),
+                                                         on_architecture(arch, Δyᶠᶠᵃ),
+                                                         on_architecture(arch, Δzᵃᵃᶜ),
+                                                         on_architecture(arch, Δzᵃᵃᶠ),
+                                                         on_architecture(arch, Azᶜᶜᵃ),
+                                                         on_architecture(arch, Azᶠᶜᵃ),
+                                                         on_architecture(arch, Azᶜᶠᵃ),
+                                                         on_architecture(arch, Azᶠᶠᵃ),
+                                                         radius,
+                                                         global_grid.conformal_mapping)
 
     return grid
 end
 
-function partition_tripolar_metric(global_grid, metric_name, jrange)
+function partition_tripolar_metric(global_grid, metric_name, irange, jrange)
 
     metric = getproperty(global_grid, metric_name)
     offsets = metric.offsets
 
-    partitioned_metric = metric[:, jrange].parent
+    partitioned_metric = metric[irange, jrange]
+     
+    if partitioned_metric isa OffsetArray
+        partitioned_metric = partitioned_metric.parent
+    end
 
     return OffsetArray(partitioned_metric, offsets...)
 end
 
-
 #####
 ##### Boundary condition extensions
 #####
 
+struct ZipperHaloCommunicationRanks{F, T, S}
+    from :: F
+      to :: T
+    sign :: S
+end
+
+ZipperHaloCommunicationRanks(sign; from, to) = ZipperHaloCommunicationRanks(from, to, sign)
+
+Base.summary(hcr::ZipperHaloCommunicationRanks) = "ZipperHaloCommunicationRanks from rank $(hcr.from) to rank $(hcr.to)"
+
+# Finding out the paired rank to communicate the north boundary
+# in case of a DistributedZipperBoundaryCondition using a "Handshake" procedure
+function receiving_rank(arch; receive_idx_x = ranks(arch)[1] - arch.local_index[1] + 1)
+
+    Ry = ranks(arch)[2]
+    receive_rank  = 0
+
+    for rank in 0:prod(ranks(arch)) - 1
+        my_x_idx = 0
+        my_y_idx = 0
+
+        if arch.local_rank == rank 
+            my_x_idx = arch.local_index[1]
+            my_y_idx = arch.local_index[2]
+        end
+
+        x_idx = all_reduce(+, my_x_idx, arch)
+        y_idx = all_reduce(+, my_y_idx, arch)
+
+        if x_idx == receive_idx_x && y_idx == Ry
+            receive_rank = rank
+        end
+    end
+
+    return receive_rank
+end
+
 # a distributed `TripolarGrid` needs a `ZipperBoundaryCondition` for the north boundary
 # only on the last rank
 function regularize_field_boundary_conditions(bcs::FieldBoundaryConditions,
-    grid::DTRG,
-    field_name::Symbol,
-    prognostic_names=nothing)
+                                              grid::DTRG,
+                                              field_name::Symbol,
+                                              prognostic_names=nothing)
 
     arch = architecture(grid)
     loc  = assumed_field_location(field_name)
-    rank = arch.local_rank
-    processor_size = ranks(arch.partition)
+    yrank = arch.local_index[2] - 1
+
+    processor_size = ranks(arch)
     sign = (field_name == :u) || (field_name == :v) ? -1 : 1
 
-    west =  regularize_boundary_condition(bcs.west,  grid, loc, 1, LeftBoundary,  prognostic_names)
-    east =  regularize_boundary_condition(bcs.east,  grid, loc, 1, RightBoundary, prognostic_names)
+    west  = regularize_boundary_condition(bcs.west,  grid, loc, 1, LeftBoundary,  prognostic_names)
+    east  = regularize_boundary_condition(bcs.east,  grid, loc, 1, RightBoundary, prognostic_names)
     south = regularize_boundary_condition(bcs.south, grid, loc, 2, LeftBoundary,  prognostic_names)
-    north = if rank == processor_size[2] - 1
+    
+    north = if yrank == processor_size[2] - 1 && processor_size[1] == 1
         ZipperBoundaryCondition(sign)
+
+    elseif yrank == processor_size[2] - 1 && processor_size[1] != 1
+        from = arch.local_rank
+        # Search the rank to send to
+        to = arch.connectivity.north
+        halo_communication = ZipperHaloCommunicationRanks(sign; from, to)
+        DistributedCommunicationBoundaryCondition(halo_communication)
+
     else
-        regularize_boundary_condition(bcs.south, grid, loc, 2, RightBoundary, prognostic_names)
+        regularize_boundary_condition(bcs.north, grid, loc, 2, RightBoundary, prognostic_names)
+
     end
 
     bottom = regularize_boundary_condition(bcs.bottom, grid, loc, 3, LeftBoundary,  prognostic_names)
@@ -164,8 +267,10 @@ end
 # with a sign that depends on the location of the field (revert the value of the halos if on edges, keep it if on nodes or centers)
 function Field((LX, LY, LZ)::Tuple, grid::DTRG, data, old_bcs, indices::Tuple, op, status)
     arch = architecture(grid)
-    rank = arch.local_rank
-    processor_size = ranks(arch.partition)
+    yrank = arch.local_index[2] - 1
+
+    processor_size = ranks(arch)
+
     indices = validate_indices(indices, (LX, LY, LZ), grid)
     validate_field_data((LX, LY, LZ), data, grid, indices)
     validate_boundary_conditions((LX, LY, LZ), grid, old_bcs)
@@ -180,27 +285,35 @@ function Field((LX, LY, LZ)::Tuple, grid::DTRG, data, old_bcs, indices::Tuple, o
         # the last rank, then we need to substitute the BC only if the old one is not already
         # a zipper boundary condition. Otherwise we always substitute because we need to 
         # inject the halo boundary conditions.
-        if rank == processor_size[2] - 1
+        if yrank == processor_size[2] - 1 && processor_size[1] == 1
             north_bc = if !(old_bcs.north isa ZBC)
                 default_zipper
             else
                 old_bcs.north
             end
+
+        elseif yrank == processor_size[2] - 1 && processor_size[1] != 1
+            sgn  = old_bcs.north isa ZBC ? old_bcs.north.condition : sign(LX, LY)
+            from = arch.local_rank
+            to   = arch.connectivity.north
+            halo_communication = ZipperHaloCommunicationRanks(sgn; from, to)
+            north_bc = DistributedCommunicationBoundaryCondition(halo_communication)
+
         else
             north_bc = new_bcs.north
         end
 
         new_bcs = FieldBoundaryConditions(; west=new_bcs.west,
-            east=new_bcs.east,
-            south=new_bcs.south,
-            north=north_bc,
-            top=new_bcs.top,
-            bottom=new_bcs.bottom)
+                                            east=new_bcs.east,
+                                            south=new_bcs.south,
+                                            north=north_bc,
+                                            top=new_bcs.top,
+                                            bottom=new_bcs.bottom)
     end
 
     buffers = FieldBoundaryBuffers(grid, data, new_bcs)
 
-    return Field{LX,LY,LZ}(grid, data, new_bcs, indices, op, status, buffers)
+    return Field{LX, LY, LZ}(grid, data, new_bcs, indices, op, status, buffers)
 end
 
 # Reconstruction the global tripolar grid for visualization purposes
@@ -208,7 +321,7 @@ function reconstruct_global_grid(grid::DistributedTripolarGrid)
 
     arch = grid.architecture
 
-    n = size(grid)
+    n    = Base.size(grid)
     halo = halo_size(grid)
     size = map(sum, concatenate_local_sizes(n, arch))
 
diff --git a/src/distributed_zipper.jl b/src/distributed_zipper.jl
new file mode 100644
index 0000000..49cf566
--- /dev/null
+++ b/src/distributed_zipper.jl
@@ -0,0 +1,111 @@
+using Oceananigans.BoundaryConditions: fill_open_boundary_regions!, 
+                                       permute_boundary_conditions, 
+                                       fill_halo_event!,
+                                       DistributedCommunication
+
+using Oceananigans.DistributedComputations: cooperative_waitall!,
+                                            recv_from_buffers!,
+                                            fill_corners!,
+                                            loc_id, 
+                                            DCBCT
+
+import Oceananigans.BoundaryConditions: fill_halo_regions!
+import Oceananigans.DistributedComputations: synchronize_communication!
+
+@inline instantiate(T::DataType) = T()
+@inline instantiate(T) = T
+
+const DistributedZipper = BoundaryCondition{<:DistributedCommunication, <:ZipperHaloCommunicationRanks}
+
+switch_north_halos!(c, north_bc, grid, loc) = nothing
+
+function switch_north_halos!(c, north_bc::DistributedZipper, grid, loc) 
+    sign  = north_bc.condition.sign
+    hz = halo_size(grid)
+    sz = size(grid)
+
+    _switch_north_halos!(parent(c), loc, sign, sz, hz)
+
+    return nothing
+end
+
+@inline reversed_halos(::Tuple{<:Any, <:Center, <:Any}, Ny, Hy) = Ny+2Hy:-1:Ny+Hy+2
+@inline reversed_halos(::Tuple{<:Any, <:Face,   <:Any}, Ny, Hy) = Ny+2Hy-1:-1:Ny+Hy+1
+
+@inline west_corner_halos(::Tuple{<:Face,   <:Any, <:Any}, Hx) = 2:Hx 
+@inline west_corner_halos(::Tuple{<:Center, <:Any, <:Any}, Hx) = 1:Hx
+
+# We throw away the first point!
+@inline function _switch_north_halos!(c, loc, sign, (Nx, Ny, Nz), (Hx, Hy, Hz)) 
+    
+    # Domain indices common for all locations
+    north_halos = Ny+Hy+1:Ny+2Hy-1
+    east_corner = Nx+Hx+1:Nx+2Hx
+    interior    = Hx+1:Nx+Hx
+
+    # Location - dependent halo indices 
+    reversed_north_halos = reversed_halos(loc, Ny, Hy)
+    west_corner = west_corner_halos(loc, Hx)
+    
+    view(c, west_corner, north_halos, :) .= sign .* reverse(view(c, west_corner, reversed_north_halos, :), dims = 1) 
+    view(c, east_corner, north_halos, :) .= sign .* reverse(view(c, east_corner, reversed_north_halos, :), dims = 1) 
+    view(c, interior,    north_halos, :) .= sign .* reverse(view(c, interior,    reversed_north_halos, :), dims = 1) 
+
+    return nothing
+end
+
+function fill_halo_regions!(c::OffsetArray, bcs, indices, loc, grid::DTRG, buffers, args...; only_local_halos = false, fill_boundary_normal_velocities = true, kwargs...)
+    if fill_boundary_normal_velocities
+        fill_open_boundary_regions!(c, bcs, indices, loc, grid, args...; kwargs...)
+    end
+    
+    north_bc = bcs.north
+
+    arch = architecture(grid)
+    fill_halos!, bcs = permute_boundary_conditions(bcs) 
+
+    number_of_tasks = length(fill_halos!)
+
+    for task = 1:number_of_tasks
+        fill_halo_event!(c, fill_halos![task], bcs[task], indices, loc, arch, grid, buffers, args...; only_local_halos, kwargs...)
+    end
+
+    fill_corners!(c, arch.connectivity, indices, loc, arch, grid, buffers, args...; only_local_halos, kwargs...)
+    
+    # We increment the tag counter only if we have actually initiated the MPI communication.
+    # This is the case only if at least one of the boundary conditions is a distributed communication 
+    # boundary condition (DCBCT) _and_ the `only_local_halos` keyword argument is false.
+    increment_tag = any(isa.(bcs, DCBCT)) && !only_local_halos
+    
+    if increment_tag 
+        arch.mpi_tag[] += 1
+    end
+        
+    switch_north_halos!(c, north_bc, grid, loc)
+    
+    return nothing
+end
+
+function synchronize_communication!(field::Field{<:Any, <:Any, <:Any, <:Any, <:DTRG})
+    arch = architecture(field.grid)
+
+    # Wait for outstanding requests
+    if !isempty(arch.mpi_requests) 
+        cooperative_waitall!(arch.mpi_requests)
+
+        # Reset MPI tag
+        arch.mpi_tag[] = 0
+
+        # Reset MPI requests
+        empty!(arch.mpi_requests)
+    end
+
+    recv_from_buffers!(field.data, field.boundary_buffers, field.grid)
+
+    north_bc = field.boundary_conditions.north
+    instantiated_location = map(instantiate, location(field))
+
+    switch_north_halos!(field, north_bc, field.grid, instantiated_location)
+
+    return nothing
+end
\ No newline at end of file
diff --git a/src/distributed_zipper_north_tags.jl b/src/distributed_zipper_north_tags.jl
new file mode 100644
index 0000000..f1752cd
--- /dev/null
+++ b/src/distributed_zipper_north_tags.jl
@@ -0,0 +1,60 @@
+import Oceananigans.DistributedComputations: north_recv_tag, 
+                                             north_send_tag,
+                                             northwest_recv_tag, 
+                                             northwest_send_tag,
+                                             northeast_recv_tag, 
+                                             northeast_send_tag
+
+ID_DIGITS = 2
+
+sides  = (:west, :east, :south, :north, :southwest, :southeast, :northwest, :northeast)
+side_id = Dict(side => n-1 for (n, side) in enumerate(sides))
+
+# Change these and we are golden!
+function north_recv_tag(arch, ::DTRG, location)
+    field_id   = string(arch.mpi_tag[], pad=ID_DIGITS)
+    loc_digit  = string(loc_id(location...), pad=ID_DIGITS)
+    last_rank  = arch.local_index[2] == ranks(arch)[2]
+    side_digit = last_rank ? "8" : string(side_id[:south])
+    return parse(Int, field_id * loc_digit * side_digit)
+end
+
+function north_send_tag(arch, ::DTRG, location)
+    field_id   = string(arch.mpi_tag[], pad=ID_DIGITS)
+    loc_digit  = string(loc_id(location...), pad=ID_DIGITS)
+    last_rank  = arch.local_index[2] == ranks(arch)[2]
+    side_digit = last_rank ? "8" : string(side_id[:north])
+    return parse(Int, field_id * loc_digit * side_digit)
+end
+
+function northwest_recv_tag(arch, ::DTRG, location)
+    field_id   = string(arch.mpi_tag[], pad=ID_DIGITS)
+    loc_digit  = string(loc_id(location...), pad=ID_DIGITS)
+    last_rank  = arch.local_index[2] == ranks(arch)[2]
+    side_digit = last_rank ? "9" : string(side_id[:southeast])
+    return parse(Int, field_id * loc_digit * side_digit)
+end
+
+function northwest_send_tag(arch, ::DTRG, location)
+    field_id   = string(arch.mpi_tag[], pad=ID_DIGITS)
+    loc_digit  = string(loc_id(location...), pad=ID_DIGITS)
+    last_rank  = arch.local_index[2] == ranks(arch)[2]
+    side_digit = last_rank ? "9" : string(side_id[:northwest])
+    return parse(Int, field_id * loc_digit * side_digit)
+end
+
+function northeast_recv_tag(arch, ::DTRG, location)
+    field_id   = string(arch.mpi_tag[], pad=ID_DIGITS)
+    loc_digit  = string(loc_id(location...), pad=ID_DIGITS)
+    last_rank  = arch.local_index[2] == ranks(arch)[2]
+    side_digit = last_rank ? "10" : string(side_id[:southwest])
+    return parse(Int, field_id * loc_digit * side_digit)
+end
+
+function northeast_send_tag(arch, ::DTRG, location)
+    field_id   = string(arch.mpi_tag[], pad=ID_DIGITS)
+    loc_digit  = string(loc_id(location...), pad=ID_DIGITS)
+    last_rank  = arch.local_index[2] == ranks(arch)[2]
+    side_digit = last_rank ? "10" : string(side_id[:northeast])
+    return parse(Int, field_id * loc_digit * side_digit)
+end
diff --git a/src/grid_utils.jl b/src/grid_utils.jl
index bed6ff3..50e52ab 100644
--- a/src/grid_utils.jl
+++ b/src/grid_utils.jl
@@ -38,26 +38,8 @@ end
         d = lat_lon_to_cartesian(φᶠᶠᵃ[ i , j+1], λᶠᶠᵃ[ i , j+1], 1)
 
         Azᶜᶜᵃ[i, j] = spherical_area_quadrilateral(a, b, c, d) * radius^2
-
-        a = lat_lon_to_cartesian(φᶜᶠᵃ[i-1,  j ], λᶜᶠᵃ[i-1,  j ], 1)
-        b = lat_lon_to_cartesian(φᶜᶠᵃ[ i ,  j ], λᶜᶠᵃ[ i ,  j ], 1)
-        c = lat_lon_to_cartesian(φᶜᶠᵃ[ i , j+1], λᶜᶠᵃ[ i , j+1], 1)
-        d = lat_lon_to_cartesian(φᶜᶠᵃ[i-1, j+1], λᶜᶠᵃ[i-1, j+1], 1)
-
-        Azᶠᶜᵃ[i, j] = spherical_area_quadrilateral(a, b, c, d) * radius^2 
-
-        a = lat_lon_to_cartesian(φᶠᶜᵃ[ i , j-1], λᶠᶜᵃ[ i , j-1], 1)
-        b = lat_lon_to_cartesian(φᶠᶜᵃ[i+1, j-1], λᶠᶜᵃ[i+1, j-1], 1)
-        c = lat_lon_to_cartesian(φᶠᶜᵃ[i+1,  j ], λᶠᶜᵃ[i+1,  j ], 1)
-        d = lat_lon_to_cartesian(φᶠᶜᵃ[ i ,  j ], λᶠᶜᵃ[ i ,  j ], 1)
-
-        Azᶜᶠᵃ[i, j] = spherical_area_quadrilateral(a, b, c, d) * radius^2 
-
-        a = lat_lon_to_cartesian(φᶜᶜᵃ[i-1, j-1], λᶜᶜᵃ[i-1, j-1], 1)
-        b = lat_lon_to_cartesian(φᶜᶜᵃ[ i , j-1], λᶜᶜᵃ[ i , j-1], 1)
-        c = lat_lon_to_cartesian(φᶜᶜᵃ[ i ,  j ], λᶜᶜᵃ[ i ,  j ], 1)
-        d = lat_lon_to_cartesian(φᶜᶜᵃ[i-1,  j ], λᶜᶜᵃ[i-1,  j ], 1)
-
-        Azᶠᶠᵃ[i, j] = spherical_area_quadrilateral(a, b, c, d) * radius^2 
+        Azᶠᶜᵃ[i, j] = Δyᶠᶜᵃ[i, j] * Δxᶠᶜᵃ[i, j]
+        Azᶜᶠᵃ[i, j] = Δyᶜᶠᵃ[i, j] * Δxᶜᶠᵃ[i, j]
+        Azᶠᶠᵃ[i, j] = Δyᶠᶠᵃ[i, j] * Δxᶠᶠᵃ[i, j]
     end
 end
diff --git a/src/split_explicit_free_surface.jl b/src/split_explicit_free_surface.jl
index 9f7ce50..30d2fc6 100644
--- a/src/split_explicit_free_surface.jl
+++ b/src/split_explicit_free_surface.jl
@@ -53,7 +53,7 @@ function positive_zipper_boundary(default_field, grid::DTRG)
         arch = architecture(grid)
         workers = ranks(arch.partition)
 
-        if arch.local_rank == workers[2] - 1
+        if arch.local_index[2] == workers[2]
                 return  FieldBoundaryConditions(
                                 top    = nothing,
                                 bottom = nothing,
@@ -76,7 +76,16 @@ end
 
 # We play the same trick as in the Distributed implementation and we extend the halos for
 # a split explicit barotropic solver on a tripolar grid. Only on the North boundary though!
-@inline tripolar_split_explicit_halos(old_halos, step_halo) = old_halos[1], max(step_halo, old_halos[2]), old_halos[3]
+@inline tripolar_split_explicit_halos(old_halos, step_halo, grid) = old_halos[1], max(step_halo, old_halos[2]), old_halos[3]
+
+@inline function tripolar_split_explicit_halos(old_halos, step_halo, grid::DTRG) 
+    Rx, Ry, _ = architecture(grid).ranks
+
+    Hx = Rx == 1 ? old_halos[1] : max(step_halo, old_halos[1])
+    Hy = max(step_halo, old_halos[2]) # Always!
+   
+    return Hx, Hy, old_halos[3]
+end
 
 # Internal function for HydrostaticFreeSurfaceModel
 function materialize_free_surface(free_surface::SplitExplicitFreeSurface, velocities, grid::TRG)
@@ -86,7 +95,9 @@ function materialize_free_surface(free_surface::SplitExplicitFreeSurface, veloci
         old_halos  = halo_size(grid)
         Nsubsteps  = length(settings.substepping.averaging_weights)
 
-        extended_halos = tripolar_split_explicit_halos(old_halos, Nsubsteps+1)
+        # We need 1 additional halos in both directions because of the shifting
+        # caused by by the fill halo of the horizontal velocity.
+        extended_halos = tripolar_split_explicit_halos(old_halos, Nsubsteps+3, grid)
         extended_grid  = with_halo(extended_halos, grid)
 
         Nze = size(extended_grid, 3)
diff --git a/src/zipper_boundary_condition.jl b/src/zipper_boundary_condition.jl
index 06811b1..d6c1539 100644
--- a/src/zipper_boundary_condition.jl
+++ b/src/zipper_boundary_condition.jl
@@ -74,9 +74,7 @@ validate_boundary_condition_location(bc::Zipper, loc::Face, side) =
     Hy = grid.Hy
     
     for j = 1 : Hy
-        @inbounds begin
-            c[i, Ny + j, k] = s * c[i′, Ny - j + 1, k] 
-        end
+        @inbounds c[i, Ny + j, k] = s * c[i′, Ny - j + 1, k] 
     end
 
     return nothing
@@ -91,9 +89,7 @@ end
     Hy = grid.Hy
     
     for j = 1 : Hy
-        @inbounds begin
-            c[i, Ny + j, k] = s * c[i′, Ny - j, k] # The Ny line is duplicated so we substitute starting Ny-1
-        end
+        @inbounds c[i, Ny + j, k] = s * c[i′, Ny - j, k] # The Ny line is duplicated so we substitute starting Ny-1
     end
 
     return nothing
@@ -106,9 +102,7 @@ end
     Hy = grid.Hy
     
     for j = 1 : Hy
-        @inbounds begin
-            c[i, Ny + j, k] = sign * c[i′, Ny - j + 1, k] 
-        end
+        @inbounds c[i, Ny + j, k] = sign * c[i′, Ny - j + 1, k] 
     end
 
     return nothing
@@ -121,9 +115,7 @@ end
     Hy = grid.Hy
     
     for j = 1 : Hy
-        @inbounds begin
-            c[i, Ny + j, k] = sign * c[i′, Ny - j, k] # The Ny line is duplicated so we substitute starting Ny-1
-        end
+        @inbounds c[i, Ny + j, k] = sign * c[i′, Ny - j, k] # The Ny line is duplicated so we substitute starting Ny-1
     end
 
     return nothing
diff --git a/test/dependencies_for_runtests.jl b/test/dependencies_for_runtests.jl
new file mode 100644
index 0000000..eb903b1
--- /dev/null
+++ b/test/dependencies_for_runtests.jl
@@ -0,0 +1,28 @@
+using OrthogonalSphericalShellGrids
+using Oceananigans
+using Oceananigans.Grids: halo_size
+using Oceananigans.Utils
+using Oceananigans.Units
+using Oceananigans.BoundaryConditions
+using OrthogonalSphericalShellGrids: get_cartesian_nodes_and_vertices
+using Oceananigans.CUDA
+using Test
+
+using KernelAbstractions: @kernel, @index
+
+arch = CUDA.has_cuda_gpu() ? GPU() : CPU()
+
+# Mask the singularity of the grid in a region of 
+# 5 degrees radius around the singularities
+function mask_singularities(underlying_grid::TripolarGrid)
+    λp = underlying_grid.conformal_mapping.first_pole_longitude
+    φp = underlying_grid.conformal_mapping.north_poles_latitude
+    
+    # We need a bottom height field that ``masks'' the singularities
+    bottom_height(λ, φ) = ((abs(λ - λp) < 5)       & (abs(φp - φ) < 5)) |
+                          ((abs(λ - λp - 180) < 5) & (abs(φp - φ) < 5)) | (φ < -80) ? 0 : - 1000
+
+    grid = ImmersedBoundaryGrid(underlying_grid, GridFittedBottom(bottom_height))
+
+    return grid
+end
diff --git a/test/distributed_tests_utils.jl b/test/distributed_tests_utils.jl
new file mode 100644
index 0000000..c4b1c16
--- /dev/null
+++ b/test/distributed_tests_utils.jl
@@ -0,0 +1,58 @@
+using JLD2
+using MPI
+using Oceananigans.DistributedComputations: reconstruct_global_field
+
+include("dependencies_for_runtests.jl")
+
+# Run the distributed grid simulation and save down reconstructed results
+function run_distributed_tripolar_grid(arch, filename)
+    distributed_grid = TripolarGrid(arch; size = (100, 100, 1), z = (-1000, 0), halo = (5, 5, 5))
+    distributed_grid = mask_singularities(distributed_grid)
+    simulation       = run_tripolar_simulation(distributed_grid)
+
+    η = reconstruct_global_field(simulation.model.free_surface.η)
+    u = reconstruct_global_field(simulation.model.velocities.u)
+    v = reconstruct_global_field(simulation.model.velocities.v)
+    c = reconstruct_global_field(simulation.model.tracers.c)
+
+    fill_halo_regions!(η)
+    fill_halo_regions!(u)
+    fill_halo_regions!(v)
+    fill_halo_regions!(c)
+
+    if arch.local_rank == 0
+        jldsave(filename; η = interior(η, :, :, 1), 
+                          u = interior(u, :, :, 1),
+                          v = interior(v, :, :, 1), 
+                          c = interior(c, :, :, 1)) 
+    end
+
+    MPI.Barrier(MPI.COMM_WORLD)
+    MPI.Finalize()
+
+    return nothing
+end
+
+# Just a random simulation on a tripolar grid
+function run_tripolar_simulation(grid)
+
+    model = HydrostaticFreeSurfaceModel(; grid = grid,
+                                          free_surface = SplitExplicitFreeSurface(grid; substeps = 20),
+                                          tracers = :c,
+                                          buoyancy = nothing, 
+                                          tracer_advection = WENO(),
+                                          momentum_advection = VectorInvariant(),
+                                          coriolis = HydrostaticSphericalCoriolis())
+
+    # Setup the model with a gaussian sea surface height
+    # near the physical north poles and one near the equator
+    ηᵢ(λ, φ, z) = exp(- (φ - 90)^2 / 10^2) + exp(- φ^2 / 10^2)
+    
+    set!(model, η = ηᵢ, c = ηᵢ)
+
+    simulation = Simulation(model, Δt = 5minutes, stop_iteration = 100)
+    
+    run!(simulation)
+
+    return simulation
+end
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index 3a8c9b0..86dca76 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,16 +1,3 @@
-using OrthogonalSphericalShellGrids
-using OrthogonalSphericalShellGrids.Oceananigans
-using Oceananigans: GPU, CPU
-using Oceananigans.CUDA
-using Test
-
-arch = CUDA.has_cuda_gpu() ? GPU() : CPU()
-
-@testset "OrthogonalSphericalShellGrids.jl" begin
-    # We probably do not need any unit tests.
-
-    # Test the grid?
-    grid = TripolarGrid(arch; size = (10, 10, 1))
-
-    # Test boundary conditions?
-end
+include("dependencies_for_runtests.jl")
+include("distributed_tests_utils.jl")
+include("test_distributed_tripolar.jl")
diff --git a/test/test_distributed_tripolar.jl b/test/test_distributed_tripolar.jl
new file mode 100644
index 0000000..3b14891
--- /dev/null
+++ b/test/test_distributed_tripolar.jl
@@ -0,0 +1,127 @@
+include("dependencies_for_runtests.jl")
+include("distributed_tests_utils.jl")
+using MPI
+
+@testset "Test distributed TripolarGrid boundary conditions..." begin
+    tripolar_boundary_conditions = """
+        using MPI
+        MPI.Init()
+
+        include("distributed_tests_utils.jl")
+
+        arch = Distributed(CPU(), partition = Partition(2, 2))
+        grid = TripolarGrid(arch; size = (20, 20, 1), z = (-1000, 0))
+
+        u = XFaceField(grid)
+        v = YFaceField(grid)
+        c = CenterField(grid)
+
+        set!(u, (x, y, z) -> y)
+        set!(v, (x, y, z) -> y)
+        set!(c, (x, y, z) -> y)
+
+        fill_halo_regions!((u, v, c))
+
+        jldopen("distributed_tripolar_boundary_conditions_" * string(arch.local_rank) * ".jld2", "w") do file
+            file["u"] = u.data
+            file["v"] = v.data
+            file["c"] = c.data
+        end
+    """
+
+    write("distributed_tests.jl", tripolar_boundary_conditions)
+    mpiexec(cmd -> run(`$cmd -n 4 julia --project distributed_tests.jl`))
+    rm("distributed_tests.jl")
+
+    # Run the serial computation    
+    grid = TripolarGrid(size = (20, 20, 1), z = (-1000, 0))
+
+    u = XFaceField(grid)
+    v = YFaceField(grid)
+    c = CenterField(grid)
+
+    set!(u, (x, y, z) -> y)
+    set!(v, (x, y, z) -> y)
+    set!(c, (x, y, z) -> y)
+
+    fill_halo_regions!((u, v, c))
+    
+    # Retrieve Parallel quantities from rank 1 (the north-west rank)
+    up1 = jldopen("distributed_tripolar_boundary_conditions_1.jld2")["u"];
+    vp1 = jldopen("distributed_tripolar_boundary_conditions_1.jld2")["v"];
+    cp1 = jldopen("distributed_tripolar_boundary_conditions_1.jld2")["c"];
+
+    # Retrieve Parallel quantities from rank 3 (the north-east rank)
+    up3 = jldopen("distributed_tripolar_boundary_conditions_3.jld2")["u"];
+    vp3 = jldopen("distributed_tripolar_boundary_conditions_3.jld2")["v"];
+    cp3 = jldopen("distributed_tripolar_boundary_conditions_3.jld2")["c"];
+
+    @test u.data[-2:14, 7:end-1, 1] ≈ up1.parent[2:end, 1:end-1, 5]
+    @test v.data[-3:14, 7:end-1, 1] ≈ vp1.parent[:,     1:end-1, 5]
+    @test c.data[-3:14, 7:end-1, 1] ≈ cp1.parent[:,     1:end-1, 5]
+
+    @test us.data[8:end, 7:end-1, 1] ≈ up3[2:end, 1:end-1, 1]
+    @test vs.data[7:end, 7:end-1, 1] ≈ vp3[:,     1:end-1, 1].parent
+    @test cs.data[7:end, 7:end-1, 1] ≈ cp3[:,     1:end-1, 1].parent
+end
+
+run_slab_distributed_grid = """
+    using MPI
+    MPI.Init()
+
+    include("distributed_tests_utils.jl")
+    arch = Distributed(CPU(), partition = Partition(1, 4))
+    run_distributed_tripolar_grid(arch, "distributed_yslab_tripolar.jld2")
+"""
+
+run_pencil_distributed_grid = """
+    using MPI
+    MPI.Init()
+
+    include("distributed_tests_utils.jl")
+    arch = Distributed(CPU(), partition = Partition(2, 2))
+    run_distributed_tripolar_grid(arch, "distributed_pencil_tripolar.jld2")
+"""
+
+@testset "Test distributed TripolarGrid simulations..." begin
+    # Run the distributed grid simulation
+    write("distributed_tests.jl", run_slab_distributed_grid)
+    mpiexec(cmd -> run(`$cmd -n 4 julia --project distributed_tests.jl`))
+    rm("distributed_tests.jl")
+
+    write("distributed_tests.jl", run_pencil_distributed_grid)
+    mpiexec(cmd -> run(`$cmd -n 4 julia --project distributed_tests.jl`))
+    rm("distributed_tests.jl")
+
+    # Run the serial computation    
+    grid = TripolarGrid(size = (100, 100, 1), z = (-1000, 0))
+    grid = mask_singularities(grid)
+
+    simulation = run_tripolar_simulation(grid)
+
+    # Retrieve Serial quantities
+    us, vs, ws = simulation.model.velocities
+    cs = simulation.model.tracers.c
+    ηs = simulation.model.free_surface.η
+
+    # Retrieve Parallel quantities
+    up_slab = jldopen("distributed_slab_tripolar.jld2")["u"]
+    vp_slab = jldopen("distributed_slab_tripolar.jld2")["v"]
+    ηp_slab = jldopen("distributed_slab_tripolar.jld2")["η"]
+    cp_slab = jldopen("distributed_slab_tripolar.jld2")["c"]
+
+    up_pencil = jldopen("distributed_pencil_tripolar.jld2")["u"]
+    vp_pencil = jldopen("distributed_pencil_tripolar.jld2")["v"]
+    ηp_pencil = jldopen("distributed_pencil_tripolar.jld2")["η"]
+    cp_pencil = jldopen("distributed_pencil_tripolar.jld2")["c"]
+
+    @test interior(us, :, :, 1) ≈ up_slab
+    @test interior(vs, :, :, 1) ≈ vp_slab
+    @test interior(cs, :, :, 1) ≈ cp_slab
+    @test interior(ηs, :, :, 1) ≈ ηp_slab
+
+    @test interior(us, :, :, 1) ≈ up_pencil
+    @test interior(vs, :, :, 1) ≈ vp_pencil
+    @test interior(cs, :, :, 1) ≈ cp_pencil
+    @test interior(ηs, :, :, 1) ≈ ηp_pencil
+end
\ No newline at end of file