Skip to content

Commit

Permalink
GPU Tracer Part I
Browse files Browse the repository at this point in the history
  • Loading branch information
OsKnoth committed Oct 23, 2023
1 parent 3f54717 commit 688cd71
Show file tree
Hide file tree
Showing 7 changed files with 230 additions and 48 deletions.
6 changes: 4 additions & 2 deletions Examples/testNHSphere.jl
Original file line number Diff line number Diff line change
Expand Up @@ -201,14 +201,16 @@ if Problem == "Galewski"
elseif Problem == "BaroWaveDrySphere"
Profile = CGDycore.BaroWaveExample()(Param,Phys)
elseif Problem == "HeldSuarezDrySphere"
Profile = CGDycore.HeldSuarezExample()(Param,Phys)
Profile = CGDycore.HeldSuarezDryExample()(Param,Phys)
elseif Problem == "HeldSuarezMoistSphere"
Profile = CGDycore.HeldSuarezMoistExample()(Param,Phys)
end

@show "InitialConditions"
U = CGDycore.InitialConditions(backend,FTB,CG,Metric,Phys,Global,Profile,Param)

# Forcing
if Problem == "HeldSuarezDrySphere"
if Problem == "HeldSuarezDrySphere" || Problem == "HeldSuarezMoistSphere"
Force = CGDycore.HeldSuarezForcing()(Param,Phys)
else
Force = CGDycore.NoForcing()(Param,Phys)
Expand Down
6 changes: 3 additions & 3 deletions Jobs/JobNHHeldSuarezDrySphere
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
mpirun -n 1 julia --project Examples/testNHSphere.jl \
mpirun -n 8 julia --project Examples/testNHSphere.jl \
--Problem="HeldSuarezDrySphere" \
--Device="GPU" \
--Device="CPU" \
--GPUType="Metal" \
--NumberThreadGPU=256 \
--NumberThreadGPU=512 \
--FloatTypeBackend="Float32" \
--NumV=5 \
--NumTr=0 \
Expand Down
58 changes: 26 additions & 32 deletions Jobs/JobNHHeldSuarezMoistSphere
Original file line number Diff line number Diff line change
@@ -1,49 +1,43 @@
mpirun -n 6 julia --project Examples/testNHSphere.jl \
--Problem="HeldSuarezMoistSphere" \
--Device="CPU" \
--GPUType="Metal" \
--NumberThreadGPU=512 \
--FloatTypeBackend="Float32" \
--Equation="CompressibleMoist" \
--NumV=5 \
--NumTr=2 \
--ProfRho="DecayingTemperatureProfile" \
--ProfTheta="DecayingTemperatureProfile" \
--ProfVel="Const" \
--ProfVelGeo="BaroWaveDrySphere" \
--ProfpBGrd="" \
--ProfRhoBGrd="" \
--Source=true \
--Force=true \
--Curl=false \
--ModelType="VectorInvariant" \
--Coriolis=true \
--Upwind=true \
--ModelType="VectorInvariant" \
--Curl=true \
--VerticalDiffusionMom=false \
--SurfaceFluxMom=false \
--VerticalDiffusion=true \
--SurfaceFlux=true \
--HorLimit=false \
--Damping=true \
--Geos=true \
--StrideDamp=10000 \
--Relax=1.0e-3 \
--Buoyancy=true \
--Decomp="EqualArea" \
--Equation="CompressibleMoist" \
--Microphysics=true \
--RelCloud=1.e-3 \
--Rain=1.0 \
--PertTh=true \
--SimDays=10 \
--SimDays=300 \
--SimSeconds=0 \
--PrintSeconds=0 \
--PrintMinutes=0 \
--PrintHours=0 \
--PrintDays=0 \
--PrintSeconds=300 \
--PrintStartTime=0 \
--StartAverageDays=200 \
--dtau=300 \
--StartAverageDays=100 \
--Flat=true \
--dtau=150 \
--IntMethod="Rosenbrock" \
--Table="ROSRK3" \
--Table="SSP-Knoth" \
--TopoS="" \
--Stretch=true \
--StretchType="Exp" \
--GridType="CubedSphere" \
--nz=45 \
--nPanel=16 \
--H=30000.0 \
--nz=64 \
--nPanel=30 \
--H=45000.0 \
--OrdPoly=3 \
--HyperVisc=true \
--HyperDCurl=1.e16 \
--HyperDGrad=1.e16 \
--HyperDDiv=1.e16

--HyperDCurl=5.e14 \
--HyperDGrad=5.e14 \
--HyperDDiv=5.e14
164 changes: 158 additions & 6 deletions src/GPU/OperatorKernel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,54 @@ end
end
end

@kernel function HyperViscKernelKoeff!(F,@Const(U),@Const(Cache),@Const(D),@Const(DW),@Const(dXdxI),
@kernel function HyperViscTracerKernel!(FTr,@Const(Tr),@Const(Rho),@Const(D),@Const(DW),@Const(dXdxI),
@Const(JJ),@Const(M),@Const(Glob))

I, J, iz = @index(Local, NTuple)
_,_,Iz,IF = @index(Global, NTuple)

ColumnTilesDim = @uniform @groupsize()[3]
N = @uniform @groupsize()[1]
Nz = @uniform @ndrange()[3]
NF = @uniform @ndrange()[4]

TrCol = @localmem eltype(FTr) (N,N, ColumnTilesDim)
TrCxCol = @localmem eltype(FTr) (N,N, ColumnTilesDim)
TrCyCol = @localmem eltype(FTr) (N,N, ColumnTilesDim)
if Iz <= Nz && IF <= NF
ID = I + (J - 1) * N
@inbounds ind = Glob[ID,IF]
@inbounds TrCol[I,J,iz] = Tr[Iz,ind] / Rho[Iz,ind]
end
@synchronize

if Iz <= Nz && IF <= NF
ID = I + (J - 1) * N
@inbounds Dxc = D[I,1] * TrCol[1,J,iz]
@inbounds Dyc = D[J,1] * TrCol[I,1,iz]
for k = 2 : N
@inbounds Dxc += D[I,k] * TrCol[k,J,iz]
@inbounds Dyc += D[J,k] * TrCol[I,k,iz]
end
@views @inbounds (GradDx, GradDy) = Grad12(Dxc,Dyc,dXdxI[1:2,1:2,:,ID,Iz,IF],JJ[ID,:,Iz,IF])
@views @inbounds (tempx, tempy) = Contra12(GradDx,GradDy,dXdxI[1:2,1:2,:,ID,Iz,IF])
@inbounds TrCxCol[I,J,iz] = tempx
@inbounds TrCyCol[I,J,iz] = tempy
end

@synchronize
if Iz <= Nz && IF <= NF
@inbounds DivTr = DW[I,1] * TrCxCol[1,J,iz] + DW[J,1] * TrCyCol[I,1,iz]
for k = 2 : N
@inbounds DivTr += DW[I,k] * TrCxCol[k,J,iz] + DW[J,k] * TrCyCol[I,k,iz]
end
ID = I + (J - 1) * N
@inbounds ind = Glob[ID,IF]
@inbounds @atomic FTr[Iz,ind] += DivTr / M[Iz,ind]
end
end

@kernel function HyperViscKoeffKernel!(F,@Const(U),@Const(Cache),@Const(D),@Const(DW),@Const(dXdxI),
@Const(JJ),@Const(M),@Const(Glob),KoeffCurl,KoeffGrad,KoeffDiv)

# gi, gj, gz, gF = @index(Group, NTuple)
Expand Down Expand Up @@ -561,6 +608,54 @@ end
end
end

@kernel function HyperViscTracerKoeffKernel!(FTr,@Const(Cache),@Const(Rho),@Const(D),@Const(DW),@Const(dXdxI),
@Const(JJ),@Const(M),@Const(Glob),KoeffDiv)

I, J, iz = @index(Local, NTuple)
_,_,Iz,IF = @index(Global, NTuple)

ColumnTilesDim = @uniform @groupsize()[3]
N = @uniform @groupsize()[1]
Nz = @uniform @ndrange()[3]
NF = @uniform @ndrange()[4]

TrCol = @localmem eltype(FTr) (N,N, ColumnTilesDim)
TrCxCol = @localmem eltype(FTr) (N,N, ColumnTilesDim)
TrCyCol = @localmem eltype(FTr) (N,N, ColumnTilesDim)
if Iz <= Nz && IF <= NF
ID = I + (J - 1) * N
@inbounds ind = Glob[ID,IF]
@inbounds TrCol[I,J,iz] = Cache[Iz,ind]
end
@synchronize

if Iz <= Nz && IF <= NF
ID = I + (J - 1) * N
@inbounds Dxc = D[I,1] * TrCol[1,J,iz]
@inbounds Dyc = D[J,1] * TrCol[I,1,iz]
for k = 2 : N
@inbounds Dxc += D[I,k] * TrCol[k,J,iz]
@inbounds Dyc += D[J,k] * TrCol[I,k,iz]
end
@views @inbounds (GradDx, GradDy) = Grad12(Dxc,Dyc,dXdxI[1:2,1:2,:,ID,Iz,IF],JJ[ID,:,Iz,IF])
@inbounds ind = Glob[ID,IF]
@views @inbounds (tempx, tempy) = Contra12(Rho[Iz,ind],GradDx,GradDy,dXdxI[1:2,1:2,:,ID,Iz,IF])
@inbounds TrCxCol[I,J,iz] = tempx
@inbounds TrCyCol[I,J,iz] = tempy
end

@synchronize
if Iz <= Nz && IF <= NF
@inbounds DivTr = DW[I,1] * TrCxCol[1,J,iz] + DW[J,1] * TrCyCol[I,1,iz]
for k = 2 : N
@inbounds DivTr += DW[I,k] * TrCxCol[k,J,iz] + DW[J,k] * TrCyCol[I,k,iz]
end
ID = I + (J - 1) * N
@inbounds ind = Glob[ID,IF]
@inbounds @atomic FTr[Iz,ind] += -KoeffDiv * DivTr / M[Iz,ind]
end
end

@kernel function DivRhoGradKernel1!(F,@Const(U),@Const(D),@Const(DW),@Const(dXdxI),
@Const(JJ),@Const(M),@Const(Glob))

Expand Down Expand Up @@ -1217,7 +1312,7 @@ end
end
end

@kernel function ThFunCKernel!(Profile,Th,time,@Const(Glob),@Const(X))
@kernel function RhoThFunCKernel!(Profile,RhoTh,time,@Const(Glob),@Const(X))

I, iz = @index(Local, NTuple)
_,Iz,IF = @index(Global, NTuple)
Expand All @@ -1234,10 +1329,51 @@ end
@inbounds x3 = eltype(X)(0.5) * (X[I,1,3,Iz,IF] + X[I,2,3,Iz,IF])
xS = SVector{3}(x1, x2 ,x3)
RhoP,_,_,_ ,ThP = Profile(xS,time)
@inbounds Th[Iz,ind] = RhoP * ThP
@inbounds RhoTh[Iz,ind] = RhoP * ThP
end
end

@kernel function RhoVFunCKernel!(Profile,RhoV,time,@Const(Glob),@Const(X))

I, iz = @index(Local, NTuple)
_,Iz,IF = @index(Global, NTuple)

ColumnTilesDim = @uniform @groupsize()[2]
N = @uniform @groupsize()[1]
Nz = @uniform @ndrange()[2]
NF = @uniform @ndrange()[3]

if Iz <= Nz
@inbounds ind = Glob[I,IF]
@inbounds x1 = eltype(X)(0.5) * (X[I,1,1,Iz,IF] + X[I,2,1,Iz,IF])
@inbounds x2 = eltype(X)(0.5) * (X[I,1,2,Iz,IF] + X[I,2,2,Iz,IF])
@inbounds x3 = eltype(X)(0.5) * (X[I,1,3,Iz,IF] + X[I,2,3,Iz,IF])
xS = SVector{3}(x1, x2 ,x3)
RhoP,_,_,_,_,QvP = Profile(xS,time)
@inbounds RhoV[Iz,ind] = RhoP * QvP
end
end

@kernel function RhoCFunCKernel!(Profile,RhoC,time,@Const(Glob),@Const(X))

I, iz = @index(Local, NTuple)
_,Iz,IF = @index(Global, NTuple)

ColumnTilesDim = @uniform @groupsize()[2]
N = @uniform @groupsize()[1]
Nz = @uniform @ndrange()[2]
NF = @uniform @ndrange()[3]

if Iz <= Nz
@inbounds ind = Glob[I,IF]
@inbounds x1 = eltype(X)(0.5) * (X[I,1,1,Iz,IF] + X[I,2,1,Iz,IF])
@inbounds x2 = eltype(X)(0.5) * (X[I,1,2,Iz,IF] + X[I,2,2,Iz,IF])
@inbounds x3 = eltype(X)(0.5) * (X[I,1,3,Iz,IF] + X[I,2,3,Iz,IF])
xS = SVector{3}(x1, x2 ,x3)
RhoP,_,_,_,_,_,QcP = Profile(xS,time)
@inbounds RhoC[Iz,ind] = RhoP * QcP
end
end

@kernel function ComputeFunFKernel!(Profile,w,time,@Const(Glob),@Const(X),Param)

Expand Down Expand Up @@ -1265,7 +1401,7 @@ end
NG = @uniform @ndrange()[2]

if IG <= NG
@views @inbounds FRho,Fu,Fv,Fw,FRhoTh = Force(U[Iz,IG,:],p[Iz,IG],lat[IG])
@inbounds FRho,Fu,Fv,Fw,FRhoTh = Force(view(U,Iz,IG,1:5),p[Iz,IG],lat[IG])
@inbounds F[Iz,IG,1] += FRho
@inbounds F[Iz,IG,2] += Fu
@inbounds F[Iz,IG,3] += Fv
Expand Down Expand Up @@ -1367,6 +1503,8 @@ function FcnGPU!(F,U,FE,Metric,Phys,Cache,Exchange,Global,Param,Force,DiscType)
Nz = size(F,1)
NDoF = size(F,2)
NF = size(Glob,2)
NumV = Global.Model.NumV
NumTr = Global.Model.NumTr
Koeff = Global.Model.HyperDDiv
Temp1 = Cache.Temp1
NumberThreadGPU = Global.ParallelCom.NumberThreadGPU
Expand All @@ -1392,25 +1530,39 @@ function FcnGPU!(F,U,FE,Metric,Phys,Cache,Exchange,Global,Param,Force,DiscType)
NzG = min(div(NumberThreadGPU,N*N),Nz)
group = (N, N, NzG, 1)
ndrange = (N, N, Nz, NF)
groupTr = group
ndrangeTr = ndrange

KRhoGradKinKernel! = RhoGradKinKernel!(backend,group)
KGradKernel! = GradKernel!(backend,group)
KDivRhoGradKernel! = DivRhoGradKernel!(backend, group)
KHyperViscKernel! = HyperViscKernel!(backend, group)
KHyperViscKernelKoeff! = HyperViscKernelKoeff!(backend, group)
KHyperViscTracerKernel! = HyperViscTracerKernel!(backend, groupTr)
KHyperViscKoeffKernel! = HyperViscKoeffKernel!(backend, group)
KHyperViscTracerKoeffKernel! = HyperViscTracerKoeffKernel!(backend, group)
KDivRhoTrUpwind3Kernel! = DivRhoTrUpwind3Kernel!(backend, group)
KMomentumCoriolisKernel! = MomentumCoriolisKernel!(backend, group)
# KMomentumKernel! = MomentumKernel!(backend, group)

@. CacheF = 0
@views MRho = CacheF[:,:,6]
KHyperViscKernel!(CacheF,MRho,U,DS,DW,dXdxI,J,M,Glob,ndrange=ndrange)
for iT = 1 : NumTr
@views CacheTr = Temp1[:,:,iT + 6]
KHyperViscTracerKernel!(CacheTr,U[:,:,iT+NumV],Rho,DS,DW,dXdxI,J,M,Glob,ndrange=ndrangeTr)
end
KernelAbstractions.synchronize(backend)

@. F = 0
KHyperViscKernelKoeff!(F,U,CacheF,DS,DW,dXdxI,J,M,Glob,KoeffCurl,KoeffGrad,KoeffDiv,ndrange=ndrange)
KHyperViscKoeffKernel!(F,U,CacheF,DS,DW,dXdxI,J,M,Glob,KoeffCurl,KoeffGrad,KoeffDiv,ndrange=ndrange)
for iT = 1 : NumTr
@views CacheTr = Temp1[:,:,iT + 6]
@views KHyperViscTracerKoeffKernel!(F[:,:,iT+NumV],CacheTr,Rho,DS,DW,dXdxI,J,M,Glob,
KoeffDiv,ndrange=ndrangeTr)
end
KernelAbstractions.synchronize(backend)


KGradKernel!(F,U,p,DS,dXdxI,J,M,MRho,Glob,Phys,ndrange=ndrange)
KernelAbstractions.synchronize(backend)

Expand Down
25 changes: 23 additions & 2 deletions src/Model/Example.jl
Original file line number Diff line number Diff line change
Expand Up @@ -218,9 +218,9 @@ function (profile::BaroWaveExample)(Param,Phys)
return local_profile
end

Base.@kwdef struct HeldSuarezExample <: Example end
Base.@kwdef struct HeldSuarezDryExample <: Example end

function (profile::HeldSuarezExample)(Param,Phys)
function (profile::HeldSuarezDryExample)(Param,Phys)
function local_profile(x,time)
FT = eltype(x)
(Lon,Lat,R)=cart2sphere(x[1],x[2],x[3])
Expand All @@ -236,3 +236,24 @@ function (profile::HeldSuarezExample)(Param,Phys)
end
return local_profile
end

Base.@kwdef struct HeldSuarezMoistExample <: Example end

function (profile::HeldSuarezMoistExample)(Param,Phys)
function local_profile(x,time)
FT = eltype(x)
(Lon,Lat,R)=cart2sphere(x[1],x[2],x[3])
z=max(R-Phys.RadEarth,FT(0));
temp = Param.T_Init + Param.LapseRate * z #+ rand(FT) * FT(0.1) * (z < FT(5000))
pres = Phys.p0 * (FT(1) + Param.LapseRate / Param.T_Init * z)^(-Phys.Grav / Phys.Rd / Param.LapseRate)
Rho = pres / Phys.Rd / temp
Th = temp * (Phys.p0 / pres)^Phys.kappa
uS = FT(0)
vS = FT(0)
w = FT(0)
qv = FT(0)
qc = FT(0)
return (Rho,uS,vS,w,Th,qv,qc)
end
return local_profile
end
1 change: 1 addition & 0 deletions src/Model/Force.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ end
struct NoForcing <: AbstractForcing end
function (::NoForcing)(Param,Phys)
function local_force(U,p,lat)
return FT(0),FT(0),FT(0),FT(0),FT(0)
end
return local_force
end
Expand Down
Loading

0 comments on commit 688cd71

Please sign in to comment.