Skip to content

Commit

Permalink
New diffusion kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
OsKnoth committed Oct 9, 2024
1 parent f941b4e commit 3a73367
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 4 deletions.
32 changes: 31 additions & 1 deletion TestKernels/testKernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ using MPI
FT = Float32
Phys = DyCore.PhysParameters{FT}()

TestIter = 200
TestIter = 2

NF = 5400 # 30*30*6
NumG = 48602
Expand Down Expand Up @@ -64,8 +64,12 @@ groupG = (Nz, NumGG)
ndrangeG = (Nz, NumG)

F = KernelAbstractions.ones(backend,FT,Nz,NumG,NumV + NumTr)
@views FTh = F[:,:,5]
CacheF = KernelAbstractions.ones(backend,FT,Nz,NumG,NumV + NumTr)
U = KernelAbstractions.ones(backend,FT,Nz,NumG,NumV + NumTr)
@. U = abs(rand()) + 1
@views Th = U[:,:,5]
@views Rho = U[:,:,1]
D = KernelAbstractions.ones(backend,FT,4,4)
DW = KernelAbstractions.ones(backend,FT,4,4)
dXdxI = KernelAbstractions.ones(backend,FT,3,3,2,DoF,Nz,NF)
Expand All @@ -81,6 +85,10 @@ Glob = KernelAbstractions.zeros(backend,Int,DoF,NF)
copyto!(Glob,GlobCPU)
CoriolisFun = GPU.CoriolisShallow()(Phys)
GravitationFun = GPU.GravitationShallow()(Phys)
KV = KernelAbstractions.ones(backend,FT,Nz,NumG)
KV = abs(rand()) + 1
dz = KernelAbstractions.ones(backend,FT,Nz,NumG)
dz = abs(rand()) + 100

KMomentumVectorInvariantCoriolisKernel! = GPU.MomentumVectorInvariantCoriolisKernel!(backend,group)
KMomentumVectorInvariantCoriolisKernel!(F,U,D,dXdxI,J,X,M,Glob,CoriolisFun,ndrange=ndrange)
Expand Down Expand Up @@ -118,6 +126,28 @@ KernelAbstractions.synchronize(backend)
KernelAbstractions.synchronize(backend)
end

@show "Diffusion Scalar"
@. F = 0
KVerticalDiffusionScalarKernel! = GPU.VerticalDiffusionScalarKernel!(backend,groupG)
KVerticalDiffusionScalarKernel!(FTh,Th,Rho,KV,dz,ndrange=ndrangeG)
KernelAbstractions.synchronize(backend)
@show sum(abs.(F))
@time for iter = 1 : TestIter
KVerticalDiffusionScalarKernel!(FTh,Th,Rho,KV,dz,ndrange=ndrangeG)
KernelAbstractions.synchronize(backend)
end

@show "DiffusionNew Scalar"
@. F = 0
KVerticalDiffusionScalarNewKernel! = GPU.VerticalDiffusionScalarNewKernel!(backend,groupG)
KVerticalDiffusionScalarNewKernel!(FTh,Th,Rho,KV,dz,ndrange=ndrangeG)
KernelAbstractions.synchronize(backend)
@show sum(abs.(F))
@time for iter = 1 : TestIter
KVerticalDiffusionScalarNewKernel!(FTh,Th,Rho,KV,dz,ndrange=ndrangeG)
KernelAbstractions.synchronize(backend)
end

RhoPos = 1
ThPos = 5
RhoVPos = 6
Expand Down
48 changes: 45 additions & 3 deletions src/GPU/OperatorKernel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -782,10 +782,52 @@ end
NumG = @uniform @ndrange()[2]

if Iz < Nz && IC <= NumG
dzT = dz[Iz+1,IC]
dzB = dz[Iz,IC]
grad = eltype(FTr)(2) * K[Iz,IC] * (Tr[Iz+1,IC] / Rho[Iz+1,IC] -
Tr[Iz,IC] / Rho[Iz,IC]) / (dz[Iz+1,IC] + dz[Iz,IC])
@atomic :monotonic FTr[Iz,IC] += grad / dz[Iz,IC]
@atomic :monotonic FTr[Iz+1,IC] += -grad / dz[Iz+1,IC]
Tr[Iz,IC] / Rho[Iz,IC]) / (dzT + dzB)
@atomic :monotonic FTr[Iz,IC] += grad / dzB
@atomic :monotonic FTr[Iz+1,IC] += -grad / dzT
end
end

@kernel inbounds = true function VerticalDiffusionScalarNewKernel!(FTr,@Const(Tr),@Const(Rho),@Const(K),
@Const(dz))
iz,iC = @index(Local, NTuple)
Iz,IC = @index(Global, NTuple)

nz = @uniform @groupsize()[1]
NodeTiles = @uniform @groupsize()[2]
Nz = @uniform @ndrange()[1]
NumG = @uniform @ndrange()[2]

qLoc = @localmem eltype(FTr) (nz,NodeTiles)
dzLoc = @localmem eltype(FTr) (nz,NodeTiles)
KLoc = @localmem eltype(FTr) (nz,NodeTiles)
if Iz <= Nz && IC <= NumG
qLoc[iz,iC] = Tr[Iz,IC] / Rho[Iz,IC]
dzLoc[iz,iC] = dz[Iz,iC]
end
if Iz < Nz && IC <= NumG
KLoc[iz,iC] = K[Iz,IC]
end

@synchronize

if IC <= NumG
if Iz < Nz
gradT = eltype(FTr)(2) * KLoc[iz,iC] * (qLoc[iz+1,iC] -
qLoc[iz,iC]) / (dzLoc[iz+1,iC] + dzLoc[iz,iC])
else
gradT = eltype(FTr)(0)
end
if Iz > 1
gradB = eltype(FTr)(2) * KLoc[iz-1,iC] * (qLoc[iz,iC] -
qLoc[iz-1,iC]) / (dzLoc[iz,iC] + dzLoc[iz-1,iC])
else
gradB = eltype(FTr)(0)
end
FTr[Iz,IC] += (gradT - gradB) / dzLoc[iz,iC]
end
end

Expand Down

0 comments on commit 3a73367

Please sign in to comment.