From 54a5e0393587769745e6f3a466ecd719ba6b1124 Mon Sep 17 00:00:00 2001 From: albert-de-montserrat Date: Fri, 27 Sep 2024 21:03:33 +0200 Subject: [PATCH 01/10] fixups --- src/MarkerChain/Advection/advection.jl | 16 +++++++------- src/MarkerChain/init.jl | 30 +++++++++++++------------- src/MarkerChain/interp1.jl | 19 ++++++++++++---- src/MarkerChain/move.jl | 20 ++++++++--------- src/MarkerChain/resample.jl | 2 +- src/Particles/move_safe.jl | 15 ++++++++++++- src/Utils.jl | 9 +++++++- 7 files changed, 71 insertions(+), 40 deletions(-) diff --git a/src/MarkerChain/Advection/advection.jl b/src/MarkerChain/Advection/advection.jl index 5ef22445..0a6c149e 100644 --- a/src/MarkerChain/Advection/advection.jl +++ b/src/MarkerChain/Advection/advection.jl @@ -9,16 +9,16 @@ end # Two-step Runge-Kutta advection scheme for marker chains function advection!( - particles::MarkerChain, + chain::MarkerChain, method::AbstractAdvectionIntegrator, V, grid_vi::NTuple{N,NTuple{N,T}}, dt, ) where {N,T} - (; coords, index) = particles + (; coords, index) = chain # compute some basic stuff - ni = size(index) + ni = size(index, 1) dxi = compute_dx(first(grid_vi)) # Need to transpose grid_vy and Vy to reuse interpolation kernels @@ -34,7 +34,7 @@ end # DIMENSION AGNOSTIC KERNELS # ParallelStencil function Runge-Kuttaadvection function for 3D staggered grids -@parallel_indices (I...) function advection_markerchain_kernel!( +@parallel_indices (i) function advection_markerchain_kernel!( p, method::AbstractAdvectionIntegrator, V::NTuple{N,T}, @@ -45,17 +45,17 @@ end dt, ) where {N,T} for ipart in cellaxes(index) - doskip(index, ipart, I...) && continue + doskip(index, ipart, i) && continue # skip if particle does not exist in this memory location - doskip(index, ipart, I...) && continue + doskip(index, ipart, i) && continue # extract particle coordinates - pᵢ = get_particle_coords(p, ipart, I...) + pᵢ = get_particle_coords(p, ipart, i) # advect particle pᵢ_new = advect_particle_markerchain(method, pᵢ, V, grid, local_limits, dxi, dt) # update particle coordinates for k in 1:N - @inbounds @index p[k][ipart, I...] = pᵢ_new[k] + @inbounds @index p[k][ipart, i] = pᵢ_new[k] end end diff --git a/src/MarkerChain/init.jl b/src/MarkerChain/init.jl index c7b4f5ba..6880a9a6 100644 --- a/src/MarkerChain/init.jl +++ b/src/MarkerChain/init.jl @@ -1,27 +1,27 @@ function init_markerchain(backend, nxcell, min_xcell, max_xcell, xv, initial_elevation) - @parallel_indices (i) function fill_coords_index!( - px, py, index, x, initial_elevation, dx_chain, nxcell, max_xcell - ) - # lower-left corner of the cell - x0 = x[i] - # fill index array - for ip in 1:nxcell - @index px[ip, i] = x0 + dx_chain * ip - @index py[ip, i] = initial_elevation - @index index[ip, i] = true - end - return nothing - end - nx = length(xv) - 1 dx = xv[2] - xv[1] dx_chain = dx / (nxcell + 1) px, py = ntuple(_ -> @fill(NaN, (nx,), celldims = (max_xcell,)), Val(2)) index = @fill(false, (nx,), celldims = (max_xcell,), eltype = Bool) - @parallel (1:nx) fill_coords_index!( + @parallel (1:nx) fill_markerchain_coords_index!( px, py, index, xv, initial_elevation, dx_chain, nxcell, max_xcell ) return MarkerChain(backend, (px, py), index, xv, min_xcell, max_xcell) end + +@parallel_indices (i) function fill_markerchain_coords_index!( + px, py, index, x, initial_elevation, dx_chain, nxcell, max_xcell +) + # lower-left corner of the cell + x0 = x[i] + # fill index array + for ip in 1:nxcell + @index px[ip, i] = x0 + dx_chain * ip + @index py[ip, i] = initial_elevation + @index index[ip, i] = true + end + return nothing +end diff --git a/src/MarkerChain/interp1.jl b/src/MarkerChain/interp1.jl index 4f5d7753..c2a693c9 100644 --- a/src/MarkerChain/interp1.jl +++ b/src/MarkerChain/interp1.jl @@ -1,7 +1,13 @@ @inline _interp1D(xq, x0, x1, y0, y1) = fma((xq - x0), (y1 - y0) * inv(x1 - x0), y0) function interp1D_extremas(xq, x, y) - last_I = findlast(!isnan, x) + last_I = 1 + for i in length(x):2 + if !isnan(x[i]) + last_I = i + break + end + end x_lo, x_hi = x[1], x[last_I] @inbounds for j in eachindex(x)[1:(end - 1)] x0, x1 = x[j], x[j + 1] @@ -29,9 +35,15 @@ function interp1D_extremas(xq, x, y) end function interp1D_inner(xq, x, y, cell_coords, I::Integer) - last_I = findlast(!isnan, x) + last_I = 1 + for i in length(x):2 + if !isnan(x[i]) + last_I = i + break + end + end x_lo, x_hi = x[1], x[last_I] - @inbounds for j in 1:last_I + @inbounds for j in 1:last_I x0, x1 = x[j], x[j + 1] # interpolation @@ -54,7 +66,6 @@ function interp1D_inner(xq, x, y, cell_coords, I::Integer) return _interp1D(xq, x0, x1, y0, y1) end end - @show x_lo, x_hi, xq, I return error("xq outside domain") end diff --git a/src/MarkerChain/move.jl b/src/MarkerChain/move.jl index 571eb036..dc6414a2 100644 --- a/src/MarkerChain/move.jl +++ b/src/MarkerChain/move.jl @@ -1,7 +1,7 @@ function move_particles!(chain::MarkerChain) (; coords, index, cell_vertices) = chain dxi = compute_dx(cell_vertices) - nxi = size(index) + nxi = size(index, 1) grid = cell_vertices @parallel (@idx nxi) move_particles_launcher!(coords, grid, dxi, index) @@ -9,8 +9,8 @@ function move_particles!(chain::MarkerChain) return nothing end -@parallel_indices (I...) function move_particles_launcher!(coords, grid, dxi, index) - _move_particles!(coords, grid, dxi, index, I) +@parallel_indices (i) function move_particles_launcher!(coords, grid, dxi, index) + _move_particles!(coords, grid, dxi, index, i) return nothing end @@ -19,11 +19,11 @@ chop(I::NTuple{3,T}) where {T} = I[1], I[2] function _move_particles!(coords, grid, dxi, index, idx) # coordinate of the lower-most-left coordinate of the parent cell - corner_xi = corner_coordinate(grid, chop(idx)) + corner_xi = corner_coordinate(grid, idx) # iterate over particles in child cell for ip in cellaxes(index) - doskip(index, ip, idx...) && continue + doskip(index, ip, idx) && continue pᵢ = cache_particle(coords, ip, idx) # check whether the particle is @@ -36,12 +36,12 @@ function _move_particles!(coords, grid, dxi, index, idx) if !(any(<(1), new_cell) || any(new_cell .> length(grid))) ## THE PARTICLE DID NOT ESCAPE THE DOMAIN # remove particle from child cell - @inbounds @index index[ip, chop(idx)] = false - @inbounds @index coords[1][ip, chop(idx)] = NaN - @inbounds @index coords[2][ip, chop(idx)] = NaN + @inbounds @index index[ip, idx] = false + @inbounds @index coords[1][ip, idx] = NaN + @inbounds @index coords[2][ip, idx] = NaN # check whether there's empty space in parent cell free_idx = find_free_memory(index, new_cell...) - free_idx == 0 && continue + iszero(free_idx) && continue # move particle and its fields to the first free memory location @inbounds @index index[free_idx, new_cell] = true fill_particle!(coords, pᵢ, free_idx, new_cell) @@ -49,7 +49,7 @@ function _move_particles!(coords, grid, dxi, index, idx) else ## SOMEHOW THE PARTICLE DID ESCAPE THE DOMAIN ## => REMOVE IT - @inbounds @index index[ip, idx...] = false + @inbounds @index index[ip, idx] = false empty_particle!(coords, ip, idx) end end diff --git a/src/MarkerChain/resample.jl b/src/MarkerChain/resample.jl index 4e216c2f..4b9dbd67 100644 --- a/src/MarkerChain/resample.jl +++ b/src/MarkerChain/resample.jl @@ -52,7 +52,7 @@ function resample_cell!( interp1D_extremas(xq, x_cell, y_cell) end if isnan(yq) - @show I, y_cell + error("BOOM") end @index py[ip, I] = yq @index index[ip, I] = true diff --git a/src/Particles/move_safe.jl b/src/Particles/move_safe.jl index 835f93cd..06f8f519 100644 --- a/src/Particles/move_safe.jl +++ b/src/Particles/move_safe.jl @@ -166,7 +166,11 @@ end return ntuple(i -> @index(args[i][ip, I...]), Val(N1)) end -@inline function cache_particle(p::NTuple{N1,T}, ip, I::NTuple{N2,Int64}) where {T,N1,N2} +@inline function cache_args(args::NTuple{N}, ip, I::Integer) where {N} + return ntuple(i -> @index(args[i][ip, I]), Val(N)) +end + +@inline function cache_particle(p::NTuple{N1,T}, ip, I::Union{Integer, NTuple{N2,Integer}}) where {T,N1,N2} return cache_args(p, ip, I) end @@ -183,6 +187,15 @@ end end end +@generated function empty_particle!( + p::NTuple{N}, ip, I::Integer +) where {N} + quote + Base.@_inline_meta + Base.Cartesian.@nexprs $N i -> @index p[i][ip, I] = NaN + end +end + @inline function fill_particle!( p::NTuple{N,T1}, field::NTuple{N,T2}, ip, I::Int64 ) where {N,T1,T2} diff --git a/src/Utils.jl b/src/Utils.jl index a6820544..cadaf962 100644 --- a/src/Utils.jl +++ b/src/Utils.jl @@ -32,13 +32,20 @@ end @inline doskip(index, ip, I::Vararg{Int64,N}) where {N} = iszero(@inbounds @index index[ip, I...]) -function get_particle_coords(p::NTuple{N,CellArray}, ip, idx::Vararg{Int64,N}) where {N} +function get_particle_coords(p::NTuple{N,CellArray}, ip, idx::Vararg{Integer,N}) where {N} ntuple(Val(N)) do i Base.@_inline_meta @inbounds @index p[i][ip, idx...] end end +function get_particle_coords(p::NTuple{N,CellArray}, ip, idx::Integer) where {N} + ntuple(Val(N)) do i + Base.@_inline_meta + @inbounds @index p[i][ip, idx] + end +end + function get_particle_coords(p::NTuple{N,T}, ip) where {N,T} ntuple(Val(N)) do i Base.@_inline_meta From c332e94ba89826b646b23be977c7789e13159aaf Mon Sep 17 00:00:00 2001 From: albert-de-montserrat Date: Mon, 30 Sep 2024 15:05:44 +0200 Subject: [PATCH 02/10] sort chain cells --- src/MarkerChain/resample.jl | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/src/MarkerChain/resample.jl b/src/MarkerChain/resample.jl index 4b9dbd67..e3df351c 100644 --- a/src/MarkerChain/resample.jl +++ b/src/MarkerChain/resample.jl @@ -22,13 +22,25 @@ function resample_cell!( ) where {T} # cell particles coordinates - x_cell, y_cell = coords[1][I], coords[2][I] + index_I = @cell index[I] px, py = coords[1], coords[2] + x_cell = @cell px[I] + y_cell = @cell py[I] + + # sort particles in the cell + perms = sortperm(x_cell) + x_cell = x_cell[perms] + y_cell = y_cell[perms] + index_I = index_I[perms] + + @cell index[I] = index_I + @cell px[I] = x_cell + @cell py[I] = y_cell # lower-left corner of the cell cell_vertex = cell_vertices[I] # number of particles in the cell - np = count(index[I]) + np = count(index_I) # dx of the new chain dx_chain = dx_cells / (np + 1) # resample the cell if the number of particles is @@ -46,10 +58,19 @@ function resample_cell!( # interpolated y coordinated yq = if 1 < I < length(index) # inner cells; this is true (ncells-2) consecutive times - interp1D_inner(xq, x_cell, y_cell, coords, I) + yq = interp1D_inner(xq, x_cell, y_cell, coords, I) + if isnan(yq) + error("BOOM 1") + end + yq else # first and last cells - interp1D_extremas(xq, x_cell, y_cell) + yq = interp1D_extremas(xq, x_cell, y_cell) + if isnan(yq) + error("BOOM 1") + end + + yq end if isnan(yq) error("BOOM") From 5e1a0a05818c77cf862da1eb9003e6d84db6b840 Mon Sep 17 00:00:00 2001 From: albert-de-montserrat Date: Mon, 30 Sep 2024 17:54:13 +0200 Subject: [PATCH 03/10] fixups --- ext/JustPICAMDGPUExt.jl | 14 ++++++++++++++ ext/JustPICCUDAExt.jl | 14 ++++++++++++++ src/MarkerChain/init.jl | 4 ++-- src/MarkerChain/interp1.jl | 13 ++++++++++--- src/MarkerChain/resample.jl | 4 ++++ 5 files changed, 44 insertions(+), 5 deletions(-) diff --git a/ext/JustPICAMDGPUExt.jl b/ext/JustPICAMDGPUExt.jl index 55f82ca4..0c610820 100644 --- a/ext/JustPICAMDGPUExt.jl +++ b/ext/JustPICAMDGPUExt.jl @@ -194,6 +194,20 @@ module _2D ## MakerChain + function JustPIC._2D.init_markerchain(::Type{AMDGPUBackend}, nxcell, min_xcell, max_xcell, xv, initial_elevation) + nx = length(xv) - 1 + dx = xv[2] - xv[1] + dx_chain = dx / (nxcell + 1) + px, py = ntuple(_ -> @fill(NaN, (nx,), celldims = (max_xcell,)), Val(2)) + index = @fill(false, (nx,), celldims = (max_xcell,), eltype = Bool) + + @parallel (1:nx) fill_markerchain_coords_index!( + px, py, index, xv, initial_elevation, dx_chain, nxcell, max_xcell + ) + + return MarkerChain(AMDGPUBackend, (px, py), index, xv, min_xcell, max_xcell) + end + function JustPIC._2D.advect_markerchain!( chain::MarkerChain{AMDGPUBackend}, method::AbstractAdvectionIntegrator, diff --git a/ext/JustPICCUDAExt.jl b/ext/JustPICCUDAExt.jl index cf8b9c3a..ad0eac53 100644 --- a/ext/JustPICCUDAExt.jl +++ b/ext/JustPICCUDAExt.jl @@ -190,6 +190,20 @@ module _2D ## MakerChain + function JustPIC._2D.init_markerchain(::Type{CUDABackend}, nxcell, min_xcell, max_xcell, xv, initial_elevation) + nx = length(xv) - 1 + dx = xv[2] - xv[1] + dx_chain = dx / (nxcell + 1) + px, py = ntuple(_ -> @fill(NaN, (nx,), celldims = (max_xcell,)), Val(2)) + index = @fill(false, (nx,), celldims = (max_xcell,), eltype = Bool) + + @parallel (1:nx) fill_markerchain_coords_index!( + px, py, index, xv, initial_elevation, dx_chain, nxcell, max_xcell + ) + + return MarkerChain(CUDABackend, (px, py), index, xv, min_xcell, max_xcell) + end + function JustPIC._2D.advect_markerchain!( chain::MarkerChain{CUDABackend}, method::AbstractAdvectionIntegrator, diff --git a/src/MarkerChain/init.jl b/src/MarkerChain/init.jl index 6880a9a6..37f55795 100644 --- a/src/MarkerChain/init.jl +++ b/src/MarkerChain/init.jl @@ -1,4 +1,4 @@ -function init_markerchain(backend, nxcell, min_xcell, max_xcell, xv, initial_elevation) +function init_markerchain(::Type{JustPIC.CPUBackend}, nxcell, min_xcell, max_xcell, xv, initial_elevation) nx = length(xv) - 1 dx = xv[2] - xv[1] dx_chain = dx / (nxcell + 1) @@ -9,7 +9,7 @@ function init_markerchain(backend, nxcell, min_xcell, max_xcell, xv, initial_ele px, py, index, xv, initial_elevation, dx_chain, nxcell, max_xcell ) - return MarkerChain(backend, (px, py), index, xv, min_xcell, max_xcell) + return MarkerChain(JustPIC.CPUBackend, (px, py), index, xv, min_xcell, max_xcell) end @parallel_indices (i) function fill_markerchain_coords_index!( diff --git a/src/MarkerChain/interp1.jl b/src/MarkerChain/interp1.jl index c2a693c9..4924a399 100644 --- a/src/MarkerChain/interp1.jl +++ b/src/MarkerChain/interp1.jl @@ -2,7 +2,7 @@ function interp1D_extremas(xq, x, y) last_I = 1 - for i in length(x):2 + for i in length(x):-1:2 if !isnan(x[i]) last_I = i break @@ -36,7 +36,7 @@ end function interp1D_inner(xq, x, y, cell_coords, I::Integer) last_I = 1 - for i in length(x):2 + for i in length(x):-1:2 if !isnan(x[i]) last_I = i break @@ -74,7 +74,14 @@ end @inline function left_cell_right_particle(cell_coords, I) px = cell_coords[1][I - 1] - ip = findlast(!isnan, px) + ip = 1 + for i in length(x):-1:2 + if !isnan(px[i]) + ip = i + break + end + end + return px[ip], @index(cell_coords[2][ip, I - 1]) end diff --git a/src/MarkerChain/resample.jl b/src/MarkerChain/resample.jl index e3df351c..7a1a4a3c 100644 --- a/src/MarkerChain/resample.jl +++ b/src/MarkerChain/resample.jl @@ -60,6 +60,10 @@ function resample_cell!( # inner cells; this is true (ncells-2) consecutive times yq = interp1D_inner(xq, x_cell, y_cell, coords, I) if isnan(yq) + @show xq + @show x_cell + @show y_cell + @show I error("BOOM 1") end yq From 31e094988799c98afc96d0075f8467654e9152c6 Mon Sep 17 00:00:00 2001 From: albert-de-montserrat Date: Tue, 1 Oct 2024 09:37:43 +0200 Subject: [PATCH 04/10] up up up --- src/MarkerChain/interp1.jl | 6 ++++-- src/MarkerChain/resample.jl | 30 +++++++++++++++--------------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/MarkerChain/interp1.jl b/src/MarkerChain/interp1.jl index 4924a399..2bb980a7 100644 --- a/src/MarkerChain/interp1.jl +++ b/src/MarkerChain/interp1.jl @@ -31,7 +31,8 @@ function interp1D_extremas(xq, x, y) return _interp1D(xq, x0, x1, y0, y1) end end - return error("xq outside domain") + # return error("xq outside domain") + return Nan end function interp1D_inner(xq, x, y, cell_coords, I::Integer) @@ -66,7 +67,8 @@ function interp1D_inner(xq, x, y, cell_coords, I::Integer) return _interp1D(xq, x0, x1, y0, y1) end end - return error("xq outside domain") + # return error("xq outside domain") + return Nan end @inline right_cell_left_particle(cell_coords, I::Int) = diff --git a/src/MarkerChain/resample.jl b/src/MarkerChain/resample.jl index 7a1a4a3c..ea54eb69 100644 --- a/src/MarkerChain/resample.jl +++ b/src/MarkerChain/resample.jl @@ -59,26 +59,26 @@ function resample_cell!( yq = if 1 < I < length(index) # inner cells; this is true (ncells-2) consecutive times yq = interp1D_inner(xq, x_cell, y_cell, coords, I) - if isnan(yq) - @show xq - @show x_cell - @show y_cell - @show I - error("BOOM 1") - end - yq + # if isnan(yq) + # @show xq + # @show x_cell + # @show y_cell + # @show I + # error("BOOM 1") + # end + # yq else # first and last cells yq = interp1D_extremas(xq, x_cell, y_cell) - if isnan(yq) - error("BOOM 1") - end + # if isnan(yq) + # error("BOOM 1") + # end - yq - end - if isnan(yq) - error("BOOM") + # yq end + # if isnan(yq) + # error("BOOM") + # end @index py[ip, I] = yq @index index[ip, I] = true end From 10c9e8d3a14398772208e7c3549523f3ba4dd107 Mon Sep 17 00:00:00 2001 From: Albert de Montserrat Date: Mon, 14 Oct 2024 20:16:51 +0200 Subject: [PATCH 05/10] some ups --- src/PhaseRatios/kernels.jl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/PhaseRatios/kernels.jl b/src/PhaseRatios/kernels.jl index 1d9b015c..d74ffc74 100644 --- a/src/PhaseRatios/kernels.jl +++ b/src/PhaseRatios/kernels.jl @@ -58,11 +58,11 @@ end for offsetᵢ in -1:0, offsetⱼ in -1:0, offsetₖ in -1:0 i_cell = I[1] + offsetᵢ - !(0 < i_cell < ni[1] + 1) && continue + 0 < i_cell < ni[1] + 1 || continue j_cell = I[2] + offsetⱼ - !(0 < j_cell < ni[2] + 1) && continue + 0 < j_cell < ni[2] + 1 || continue k_cell = I[3] + offsetₖ - !(0 < k_cell < ni[3] + 1) && continue + 0 < k_cell < ni[3] + 1 || continue cell_index = i_cell, j_cell, k_cell @@ -98,9 +98,9 @@ end for offsetᵢ in -1:0, offsetⱼ in -1:0 i_cell = I[1] + offsetᵢ - !(0 < i_cell < ni[1] + 1) && continue + 0 < i_cell < ni[1] + 1 || continue j_cell = I[2] + offsetⱼ - !(0 < j_cell < ni[2] + 1) && continue + 0 < j_cell < ni[2] + 1 || continue cell_index = i_cell, j_cell @@ -110,7 +110,7 @@ end x = @inline bilinear_weight(cell_vertex, p, di) ph_local = @index phases[ip, cell_index...] # this is doing sum(w * δij(i, phase)), where δij is the Kronecker delta - w = w .+ x .* ntuple(j -> (ph_local == j), NC) + w = ntuple(j -> (ph_local == j) * x[i] + w[i], NC) end end @@ -139,7 +139,7 @@ function phase_ratio_weights( # sumw += x # reduce ph_local = ph[i] # this is doing sum(w * δij(i, phase)), where δij is the Kronecker delta - w = w .+ x .* ntuple(j -> (ph_local == j), Val(NC)) + w = ntuple(j -> (ph_local == j) * x[i] + w[i], Val(NC)) end w = w .* inv(sum(w)) return w From a8ccee55e0581d7a435cd2e1c92315090eb559ba Mon Sep 17 00:00:00 2001 From: albert-de-montserrat Date: Mon, 28 Oct 2024 12:05:32 +0100 Subject: [PATCH 06/10] fix chain sorting on the GPU --- src/MarkerChain/resample.jl | 112 ++++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 51 deletions(-) diff --git a/src/MarkerChain/resample.jl b/src/MarkerChain/resample.jl index ea54eb69..20315b9e 100644 --- a/src/MarkerChain/resample.jl +++ b/src/MarkerChain/resample.jl @@ -12,6 +12,10 @@ function resample!(chain::MarkerChain) nx = length(index) dx_cells = cell_length(chain) + # sort marker chain - can't be done at the cell level because + # SA can't be sorted inside a GPU kernel + _sort_chain!(chain) + # call kernel @parallel (1:nx) resample!(coords, cell_vertices, index, min_xcell, max_xcell, dx_cells) return nothing @@ -27,16 +31,6 @@ function resample_cell!( x_cell = @cell px[I] y_cell = @cell py[I] - # sort particles in the cell - perms = sortperm(x_cell) - x_cell = x_cell[perms] - y_cell = y_cell[perms] - index_I = index_I[perms] - - @cell index[I] = index_I - @cell px[I] = x_cell - @cell py[I] = y_cell - # lower-left corner of the cell cell_vertex = cell_vertices[I] # number of particles in the cell @@ -47,48 +41,48 @@ function resample_cell!( # less than min_xcell or it is too distorted do_resampling = (np < min_xcell) || isdistorded(x_cell, dx_chain) - np_new = max(min_xcell, np) - dx_chain = dx_cells / (np_new + 1) - if do_resampling - # @show I - # fill index array - for ip in 1:np_new - # x query point - @index px[ip, I] = xq = cell_vertex + dx_chain * ip - # interpolated y coordinated - yq = if 1 < I < length(index) - # inner cells; this is true (ncells-2) consecutive times - yq = interp1D_inner(xq, x_cell, y_cell, coords, I) - # if isnan(yq) - # @show xq - # @show x_cell - # @show y_cell - # @show I - # error("BOOM 1") - # end - # yq - else - # first and last cells - yq = interp1D_extremas(xq, x_cell, y_cell) - # if isnan(yq) - # error("BOOM 1") - # end + # np_new = max(min_xcell, np) + # dx_chain = dx_cells / (np_new + 1) + # if do_resampling + # # @show I + # # fill index array + # for ip in 1:np_new + # # x query point + # @index px[ip, I] = xq = cell_vertex + dx_chain * ip + # # interpolated y coordinated + # yq = if 1 < I < length(index) + # # inner cells; this is true (ncells-2) consecutive times + # yq = interp1D_inner(xq, x_cell, y_cell, coords, I) + # # if isnan(yq) + # # @show xq + # # @show x_cell + # # @show y_cell + # # @show I + # # error("BOOM 1") + # # end + # # yq + # else + # # first and last cells + # yq = interp1D_extremas(xq, x_cell, y_cell) + # # if isnan(yq) + # # error("BOOM 1") + # # end - # yq - end - # if isnan(yq) - # error("BOOM") - # end - @index py[ip, I] = yq - @index index[ip, I] = true - end - # fill empty memory locations - for ip in (np_new + 1):max_xcell - @index px[ip, I] = NaN - @index py[ip, I] = NaN - @index index[ip, I] = false - end - end + # # yq + # end + # # if isnan(yq) + # # error("BOOM") + # # end + # @index py[ip, I] = yq + # @index index[ip, I] = true + # end + # # fill empty memory locations + # for ip in (np_new + 1):max_xcell + # @index px[ip, I] = NaN + # @index py[ip, I] = NaN + # @index index[ip, I] = false + # end + # end return nothing end @@ -117,3 +111,19 @@ function isdistorded(x_cell, dx_ideal) end return false end + +# sort marker chain cells +function _sort_chain!(chain::MarkerChain{T}) where T + + @inline sort_dimension(::Type{JustPIC.CPUBackend}) = 3 + @inline sort_dimension(::T) where T = 1 + + (; coords, index) = chain + # sort permutations of each cell + perms = similar(coords[1].data, Int64) + sortperm!(perms, coords[1].data; dims=sort_dimension(T)) + coords[2].data .= @views coords[2].data[perms] + index.data .= @views index.data[perms] + + return nothing +end \ No newline at end of file From e93218adb06ef8a80ce4dc09af704612bc181d9e Mon Sep 17 00:00:00 2001 From: albert-de-montserrat Date: Mon, 28 Oct 2024 15:23:41 +0100 Subject: [PATCH 07/10] fix marker chain on GPUs --- src/MarkerChain/Advection/advection.jl | 1 + src/MarkerChain/interp1.jl | 41 ++++++------- src/MarkerChain/resample.jl | 81 ++++++++++---------------- src/MarkerChain/sort.jl | 34 ----------- src/common.jl | 3 - 5 files changed, 53 insertions(+), 107 deletions(-) delete mode 100644 src/MarkerChain/sort.jl diff --git a/src/MarkerChain/Advection/advection.jl b/src/MarkerChain/Advection/advection.jl index 0a6c149e..372d1778 100644 --- a/src/MarkerChain/Advection/advection.jl +++ b/src/MarkerChain/Advection/advection.jl @@ -1,3 +1,4 @@ + function advect_markerchain!( chain::MarkerChain, method::AbstractAdvectionIntegrator, V, grid_vxi, dt ) diff --git a/src/MarkerChain/interp1.jl b/src/MarkerChain/interp1.jl index 2bb980a7..bf8cfa28 100644 --- a/src/MarkerChain/interp1.jl +++ b/src/MarkerChain/interp1.jl @@ -1,4 +1,4 @@ -@inline _interp1D(xq, x0, x1, y0, y1) = fma((xq - x0), (y1 - y0) * inv(x1 - x0), y0) +@inline _interp1D(xq, x0, x1, y0, y1) = muladd((xq - x0), (y1 - y0) * inv(x1 - x0), y0) function interp1D_extremas(xq, x, y) last_I = 1 @@ -32,10 +32,10 @@ function interp1D_extremas(xq, x, y) end end # return error("xq outside domain") - return Nan + return NaN end -function interp1D_inner(xq, x, y, cell_coords, I::Integer) +function interp1D_inner(xq, x, y, coords, I::Integer) last_I = 1 for i in length(x):-1:2 if !isnan(x[i]) @@ -44,18 +44,12 @@ function interp1D_inner(xq, x, y, cell_coords, I::Integer) end end x_lo, x_hi = x[1], x[last_I] - @inbounds for j in 1:last_I + @inbounds for j in 1:last_I-1 x0, x1 = x[j], x[j + 1] - # interpolation - if x0 ≤ xq ≤ x1 - y0, y1 = y[j], y[j + 1] - return _interp1D(xq, x0, x1, y0, y1) - end - # interpolate using the last particle of left-neighbouring cell if xq ≤ x_lo - x0, y0 = left_cell_right_particle(cell_coords, I) + x0, y0 = left_cell_right_particle(coords, I) x1, y1 = x[1], y[1] return _interp1D(xq, x0, x1, y0, y1) end @@ -63,28 +57,35 @@ function interp1D_inner(xq, x, y, cell_coords, I::Integer) # interpolate using the first particle of right-neighbouring cell if xq ≥ x_hi x0, y0 = x[last_I], y[last_I] - x1, y1 = right_cell_left_particle(cell_coords, I) + x1, y1 = right_cell_left_particle(coords, I) + return _interp1D(xq, x0, x1, y0, y1) + end + + # interpolation + if x0 ≤ xq ≤ x1 + y0, y1 = y[j], y[j + 1] return _interp1D(xq, x0, x1, y0, y1) end end # return error("xq outside domain") - return Nan + return NaN end -@inline right_cell_left_particle(cell_coords, I::Int) = - @index(cell_coords[1][1, I + 1]), @index(cell_coords[2][1, I + 1]) +@inline right_cell_left_particle(coords, I::Int) = + @index(coords[1][1, I + 1]), @index(coords[2][1, I + 1]) -@inline function left_cell_right_particle(cell_coords, I) - px = cell_coords[1][I - 1] +@inline function left_cell_right_particle(coords, I) + px = coords[1] + # px = @cell coords[1][I - 1] ip = 1 - for i in length(x):-1:2 - if !isnan(px[i]) + for i in cellnum(px):-1:2 + if !isnan(@index px[i, I-1]) ip = i break end end - return px[ip], @index(cell_coords[2][ip, I - 1]) + return @index(px[ip, I-1]), @index(coords[2][ip, I - 1]) end @inline function is_above_surface(xq, yq, coords, cell_vertices) diff --git a/src/MarkerChain/resample.jl b/src/MarkerChain/resample.jl index 20315b9e..5411d9a6 100644 --- a/src/MarkerChain/resample.jl +++ b/src/MarkerChain/resample.jl @@ -14,7 +14,7 @@ function resample!(chain::MarkerChain) # sort marker chain - can't be done at the cell level because # SA can't be sorted inside a GPU kernel - _sort_chain!(chain) + sort_chain!(chain) # call kernel @parallel (1:nx) resample!(coords, cell_vertices, index, min_xcell, max_xcell, dx_cells) @@ -33,7 +33,7 @@ function resample_cell!( # lower-left corner of the cell cell_vertex = cell_vertices[I] - # number of particles in the cell + # number of p`articles in the cell np = count(index_I) # dx of the new chain dx_chain = dx_cells / (np + 1) @@ -41,48 +41,32 @@ function resample_cell!( # less than min_xcell or it is too distorted do_resampling = (np < min_xcell) || isdistorded(x_cell, dx_chain) - # np_new = max(min_xcell, np) - # dx_chain = dx_cells / (np_new + 1) - # if do_resampling - # # @show I - # # fill index array - # for ip in 1:np_new - # # x query point - # @index px[ip, I] = xq = cell_vertex + dx_chain * ip - # # interpolated y coordinated - # yq = if 1 < I < length(index) - # # inner cells; this is true (ncells-2) consecutive times - # yq = interp1D_inner(xq, x_cell, y_cell, coords, I) - # # if isnan(yq) - # # @show xq - # # @show x_cell - # # @show y_cell - # # @show I - # # error("BOOM 1") - # # end - # # yq - # else - # # first and last cells - # yq = interp1D_extremas(xq, x_cell, y_cell) - # # if isnan(yq) - # # error("BOOM 1") - # # end - - # # yq - # end - # # if isnan(yq) - # # error("BOOM") - # # end - # @index py[ip, I] = yq - # @index index[ip, I] = true - # end - # # fill empty memory locations - # for ip in (np_new + 1):max_xcell - # @index px[ip, I] = NaN - # @index py[ip, I] = NaN - # @index index[ip, I] = false - # end - # end + np_new = max(min_xcell, np) + dx_chain = dx_cells / (np_new + 1) + if do_resampling + # fill index array + for ip in 1:np_new + # x query point + xq = cell_vertex + dx_chain * ip + # interpolated y coordinated + yq = if 1 < I < length(index) + # inner cells; this is true (ncells-2) consecutive times + yq = interp1D_inner(xq, x_cell, y_cell, coords, I) + else + # first and last cells + yq = interp1D_extremas(xq, x_cell, y_cell) + end + @index px[ip, I] = xq + @index py[ip, I] = yq + @index index[ip, I] = true + end + # fill empty memory locations + for ip in (np_new + 1):max_xcell + @index px[ip, I] = NaN + @index py[ip, I] = NaN + @index index[ip, I] = false + end + end return nothing end @@ -113,15 +97,12 @@ function isdistorded(x_cell, dx_ideal) end # sort marker chain cells -function _sort_chain!(chain::MarkerChain{T}) where T - - @inline sort_dimension(::Type{JustPIC.CPUBackend}) = 3 - @inline sort_dimension(::T) where T = 1 +function sort_chain!(chain::MarkerChain{T}) where T (; coords, index) = chain # sort permutations of each cell - perms = similar(coords[1].data, Int64) - sortperm!(perms, coords[1].data; dims=sort_dimension(T)) + perms = sortperm(coords[1].data; dims=2) + coords[1].data .= @views coords[1].data[perms] coords[2].data .= @views coords[2].data[perms] index.data .= @views index.data[perms] diff --git a/src/MarkerChain/sort.jl b/src/MarkerChain/sort.jl deleted file mode 100644 index c882f484..00000000 --- a/src/MarkerChain/sort.jl +++ /dev/null @@ -1,34 +0,0 @@ -function sort_chain!(p::MarkerChain) - (; coords, index) = p - # sort permutations of each cell - ni = size(first(coords)) - @parallel (@idx ni) _sort!(coords, index) -end - -# 1D MarkerChain -@parallel_indices (I...) function _sort!(coords::NTuple{2,T}, index) where {T} - - # extract and save cell particles coordinates - particle_xᵢ = ntuple(Val(2)) do i - coords[i][I...] - end - indexᵢ = index[I...] - - # sort permutations of each cell - permutations = sortperm(first(particle_xᵢ)) - - # if cell is already sorted, do nothing - if !issorted(permutations) - # otherwise, sort the cell - for ip in eachindex(permutations) - permutationᵢ = permutations[ip] - @assert permutationᵢ ≤ length(permutations) - - @index coords[1][ip, I...] = particle_xᵢ[1][permutationᵢ] - @index coords[2][ip, I...] = particle_xᵢ[2][permutationᵢ] - @index index[ip, I...] = indexᵢ[permutationᵢ] - end - end - - return nothing -end diff --git a/src/common.jl b/src/common.jl index 242e03c6..e04b4645 100644 --- a/src/common.jl +++ b/src/common.jl @@ -64,9 +64,6 @@ export move_particles! include("MarkerChain/interp1.jl") -include("MarkerChain/sort.jl") -export sort_chain! - include("MarkerChain/resample.jl") export resample! From 04634d59e650eb6eba8747f720882d64f04bfe34 Mon Sep 17 00:00:00 2001 From: albert-de-montserrat Date: Mon, 28 Oct 2024 15:40:17 +0100 Subject: [PATCH 08/10] fix CUDA kernels --- src/PhaseRatios/kernels.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/PhaseRatios/kernels.jl b/src/PhaseRatios/kernels.jl index ad37e9e5..377d4490 100644 --- a/src/PhaseRatios/kernels.jl +++ b/src/PhaseRatios/kernels.jl @@ -99,9 +99,9 @@ end for offsetᵢ in -1:0, offsetⱼ in -1:0 i_cell = I[1] + offsetᵢ - 0 < i_cell < ni[1] + 1 || continue + !(0 < i_cell < ni[1] + 1) && continue j_cell = I[2] + offsetⱼ - 0 < j_cell < ni[2] + 1 || continue + !(0 < j_cell < ni[2] + 1) && continue cell_index = i_cell, j_cell @@ -113,7 +113,7 @@ end x = @inline bilinear_weight(cell_vertex, p, di) ph_local = @index phases[ip, cell_index...] # this is doing sum(w * δij(i, phase)), where δij is the Kronecker delta - w = ntuple(j -> (ph_local == j) * x[i] + w[i], NC) + w = w .+ x .* ntuple(j -> (ph_local == j), NC) end end @@ -142,7 +142,7 @@ function phase_ratio_weights( # sumw += x # reduce ph_local = ph[i] # this is doing sum(w * δij(i, phase)), where δij is the Kronecker delta - w = ntuple(j -> (ph_local == j) * x[i] + w[i], Val(NC)) + w = w .+ x .* ntuple(j -> (ph_local == j), NC) end w = w .* inv(sum(w)) return w From 89c048461ab9a3cc6bdf4bbf5c66121c84c59ee4 Mon Sep 17 00:00:00 2001 From: albert-de-montserrat Date: Mon, 28 Oct 2024 15:40:27 +0100 Subject: [PATCH 09/10] add Pascal's function --- src/MarkerChain/init.jl | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/MarkerChain/init.jl b/src/MarkerChain/init.jl index 37f55795..ae207197 100644 --- a/src/MarkerChain/init.jl +++ b/src/MarkerChain/init.jl @@ -25,3 +25,18 @@ end end return nothing end + +@parallel_indices (i) function fill_markerchain_coords_index!( + px, py, index, x, initial_elevation::AbstractArray{T, 1}, dx_chain, nxcell, max_xcell +) where {T} + # lower-left corner of the cell + x0 = x[i] + initial_elevation0 = initial_elevation[i] + # fill index array + for ip in 1:nxcell + @index px[ip, i] = x0 + dx_chain * ip + @index py[ip, i] = initial_elevation0 + @index index[ip, i] = true + end + return nothing +end \ No newline at end of file From c34a83737106d14e5f6d7fb56ebb19959e90c6f3 Mon Sep 17 00:00:00 2001 From: albert-de-montserrat Date: Mon, 28 Oct 2024 16:15:38 +0100 Subject: [PATCH 10/10] style correction --- src/MarkerChain/resample.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/MarkerChain/resample.jl b/src/MarkerChain/resample.jl index 5411d9a6..a8579128 100644 --- a/src/MarkerChain/resample.jl +++ b/src/MarkerChain/resample.jl @@ -51,10 +51,10 @@ function resample_cell!( # interpolated y coordinated yq = if 1 < I < length(index) # inner cells; this is true (ncells-2) consecutive times - yq = interp1D_inner(xq, x_cell, y_cell, coords, I) + interp1D_inner(xq, x_cell, y_cell, coords, I) else # first and last cells - yq = interp1D_extremas(xq, x_cell, y_cell) + interp1D_extremas(xq, x_cell, y_cell) end @index px[ip, I] = xq @index py[ip, I] = yq