Skip to content

Commit

Permalink
Hyperdiffusion W
Browse files Browse the repository at this point in the history
  • Loading branch information
OsKnoth committed Jan 3, 2024
1 parent 1f9ac77 commit 741acef
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 8 deletions.
Binary file modified GlobInd
Binary file not shown.
10 changes: 5 additions & 5 deletions Jobs/NHSphere/JobNHHeldSuarezMoistSphere
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
mpirun -n 6 julia --project Examples/testNHSphere.jl \
mpirun -n 4 julia --project Examples/testNHSphere.jl \
--Problem="HeldSuarezMoistSphere" \
--Device="CPU" \
--GPUType="Metal" \
Expand All @@ -14,13 +14,13 @@ mpirun -n 6 julia --project Examples/testNHSphere.jl \
--Forcing=true \
--Curl=false \
--ModelType="VectorInvariant" \
--VerticalDiffusion=true \
--SurfaceFlux=true \
--VerticalDiffusion=false \
--SurfaceFlux=false \
--Coriolis=true \
--Upwind=true \
--HorLimit=true \
--HorLimit=false \
--Equation=CompressibleMoist \
--Microphysics=true \
--Microphysics=false \
--TypeMicrophysics="SimpleMicrophysics" \
--Buoyancy=true \
--Damping=true \
Expand Down
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56"
NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9"
PairedLinkedLists = "7a42b37b-ed3b-477a-9848-3661f53bb718"
Polynomials = "f27b6e38-b328-58d1-80ce-0feddd5e7a45"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
ProfileCanvas = "efd6af41-a80b-495e-886c-e51b0c7d77a3"
Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
Expand Down
15 changes: 15 additions & 0 deletions src/FiniteElements/FiniteElements.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
module FiniteElements

using Polynomials

abstract type FiniteElement end

struct NodalElement <: FiniteElement
NumBases::Int
NodalBases::Array{Polynomial,1}
NodalPoints::Array{Float64,2}
Diff1Matrix::Array{Float64,2}
end


end
21 changes: 18 additions & 3 deletions src/GPU/FcnGPU.jl
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,13 @@ end

function FcnGPU!(F,U,FE,Metric,Phys,Cache,Exchange,Global,Param,DiscType)

@show sum(abs.(U))
backend = get_backend(F)
FT = eltype(F)
Glob = FE.Glob
DS = FE.DS
DW = FE.DW
M = FE.M
MW = FE.MW
dXdxI = Metric.dXdxI
X = Metric.X
J = Metric.J
Expand All @@ -128,7 +128,6 @@ function FcnGPU!(F,U,FE,Metric,Phys,Cache,Exchange,Global,Param,DiscType)
lat = Metric.lat
dz = Metric.dz
zP = Metric.zP
@show size(zP)
DoF = FE.DoF
N = size(FE.DS,1)
Nz = size(F,1)
Expand Down Expand Up @@ -157,7 +156,8 @@ function FcnGPU!(F,U,FE,Metric,Phys,Cache,Exchange,Global,Param,DiscType)
@views FRhoTr = F[:,:,5]
# Cache
@views CacheF = Temp1[:,:,1:6]
@views CacheFF = Temp1[:,:,1:6+NumTr]
@views CacheFF = Temp1[:,:,1:6+NumTr+1]
@views Cachew = Temp1[:,:,6 + 1 + NumTr]
@views p = Cache.AuxG[:,:,1]
KV = Cache.KV
TSurf = Cache.TSurf
Expand All @@ -182,6 +182,10 @@ function FcnGPU!(F,U,FE,Metric,Phys,Cache,Exchange,Global,Param,DiscType)
NDoFG = min(div(NumberThreadGPU,Nz),NDoF)
groupG = (Nz, NDoFG)
ndrangeG = (Nz, NDoF)
NzG = min(div(NumberThreadGPU,N*N),Nz-1)
groupw = (N, N, NzG, 1)
ndrangewB = (Nz-1, NBF)
ndrangewI = (Nz-1, NF-NBF)

KRhoGradKinKernel! = RhoGradKinKernel!(backend,group)
KGradKernel! = GradKernel!(backend,group)
Expand All @@ -195,6 +199,8 @@ function FcnGPU!(F,U,FE,Metric,Phys,Cache,Exchange,Global,Param,DiscType)
end
KHyperViscTracerKernel! = HyperViscTracerKernel!(backend, groupTr)
KHyperViscTracerKoeffKernel! = HyperViscTracerKoeffKernel!(backend, groupTr)
KHyperViscWKernel! = HyperViscWKernel!(backend, groupTr)
KHyperViscWKoeffKernel! = HyperViscWKoeffKernel!(backend, groupTr)
KDivRhoTrUpwind3Kernel! = DivRhoTrUpwind3Kernel!(backend, groupTr)
if Global.Model.SurfaceFlux
NFG = min(div(NumberThreadGPU,N*N),NF)
Expand All @@ -218,6 +224,8 @@ function FcnGPU!(F,U,FE,Metric,Phys,Cache,Exchange,Global,Param,DiscType)
@views KHyperViscTracerKernel!(CacheTr,U[:,:,iT+NumV],Rho,DS,DW,dXdxI,J,M,Glob,ndrange=ndrangeB)
KernelAbstractions.synchronize(backend)
end
@views KHyperViscWKernel!(Cachew,U[:,:,4],DS,DW,dXdxI,J,MW,Glob,ndrange=ndrangeB)
KernelAbstractions.synchronize(backend)
Parallels.ExchangeData3DSendGPU(CacheFF,Exchange)

KHyperViscKernel!(CacheF,MRho,U,DS,DW,dXdxI_I,J_I,M,Glob_I,ndrange=ndrangeI)
Expand All @@ -227,6 +235,8 @@ function FcnGPU!(F,U,FE,Metric,Phys,Cache,Exchange,Global,Param,DiscType)
@views KHyperViscTracerKernel!(CacheTr,U[:,:,iT+NumV],Rho,DS,DW,dXdxI_I,J_I,M,Glob_I,ndrange=ndrangeI)
KernelAbstractions.synchronize(backend)
end
@views KHyperViscWKernel!(Cachew,U[:,:,4],DS,DW,dXdxI_I,J_I,MW,Glob_I,ndrange=ndrangeB)
KernelAbstractions.synchronize(backend)

Parallels.ExchangeData3DRecvGPU!(CacheFF,Exchange)
KernelAbstractions.synchronize(backend)
Expand All @@ -244,6 +254,8 @@ function FcnGPU!(F,U,FE,Metric,Phys,Cache,Exchange,Global,Param,DiscType)
KoeffDiv,ndrange=ndrangeB)
KernelAbstractions.synchronize(backend)
end
@views KHyperViscWKoeffKernel!(F[:,:,4],Cachew,DS,DW,dXdxI,J,MW,Glob,KoeffDiv,ndrange=ndrangeB)
KernelAbstractions.synchronize(backend)
KGradKernel!(F,U,p,DS,dXdxI,J,M,MRho,Glob,Phys,ndrange=ndrangeB)
KernelAbstractions.synchronize(backend)
if Global.Model.Coriolis
Expand Down Expand Up @@ -286,6 +298,8 @@ function FcnGPU!(F,U,FE,Metric,Phys,Cache,Exchange,Global,Param,DiscType)
KoeffDiv,ndrange=ndrangeI)
KernelAbstractions.synchronize(backend)
end
@views KHyperViscWKoeffKernel!(F[:,:,4],Cachew,DS,DW,dXdxI_I,J_I,MW,Glob,KoeffDiv,ndrange=ndrangeI)
KernelAbstractions.synchronize(backend)
KGradKernel!(F,U,p,DS,dXdxI_I,J_I,M,MRho,Glob_I,Phys,ndrange=ndrangeI)
KernelAbstractions.synchronize(backend)
if Global.Model.Coriolis
Expand Down Expand Up @@ -351,6 +365,7 @@ function FcnGPUAMD!(F,U,FE,Metric,Phys,Cache,Exchange,Global,Param,DiscType)
DS = FE.DS
DW = FE.DW
M = FE.M
MW = FE.MW
dXdxI = Metric.dXdxI
X = Metric.X
J = Metric.J
Expand Down
106 changes: 106 additions & 0 deletions src/GPU/OperatorKernel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,59 @@ end
end
end

@kernel function HyperViscWKernel!(Fw,@Const(w),@Const(D),@Const(DW),@Const(dXdxI),
@Const(JJ),@Const(MW),@Const(Glob))

I, J, iz = @index(Local, NTuple)
_,_,Iz,IF = @index(Global, NTuple)

ColumnTilesDim = @uniform @groupsize()[3]
N = @uniform @groupsize()[1]
Nz = @uniform @ndrange()[3]
NF = @uniform @ndrange()[4]

ID = I + (J - 1) * N
@inbounds ind = Glob[ID,IF]

wCol = @localmem eltype(Fw) (N,N, ColumnTilesDim)
wCxCol = @localmem eltype(Fw) (N,N, ColumnTilesDim)
wCyCol = @localmem eltype(Fw) (N,N, ColumnTilesDim)
if Iz <= Nz && IF <= NF
@inbounds wCol[I,J,iz] = w[Iz,ind]
end
@synchronize

ID = I + (J - 1) * N
@inbounds ind = Glob[ID,IF]

if Iz <= Nz && IF <= NF
@inbounds Dxc = D[I,1] * wCol[1,J,iz]
@inbounds Dyc = D[J,1] * wCol[I,1,iz]
for k = 2 : N
@inbounds Dxc += D[I,k] * wCol[k,J,iz]
@inbounds Dyc += D[J,k] * wCol[I,k,iz]
end
@views @inbounds (GradDx, GradDy) = Grad12(Dxc,Dyc,dXdxI[1:2,1:2,:,ID,Iz,IF],JJ[ID,:,Iz,IF])
@views @inbounds (tempx, tempy) = Contra12(GradDx,GradDy,dXdxI[1:2,1:2,:,ID,Iz,IF])
@inbounds wCxCol[I,J,iz] = tempx
@inbounds wCyCol[I,J,iz] = tempy
end

@synchronize

ID = I + (J - 1) * N
@inbounds ind = Glob[ID,IF]
if Iz <= Nz && IF <= NF
@inbounds Divw = DW[I,1] * wCxCol[1,J,iz] + DW[J,1] * wCyCol[I,1,iz]
for k = 2 : N
@inbounds Divw += DW[I,k] * wCxCol[k,J,iz] + DW[J,k] * wCyCol[I,k,iz]
end
if Iz < Nz
@inbounds @atomic Fw[Iz+1,ind] += Divw / MW[Iz,ind]
end
end
end

@kernel function HyperViscKoeffKernel!(F,@Const(U),@Const(Cache),@Const(D),@Const(DW),@Const(dXdxI),
@Const(JJ),@Const(M),@Const(Glob),KoeffCurl,KoeffGrad,KoeffDiv)

Expand Down Expand Up @@ -692,6 +745,59 @@ end
end
end

@kernel function HyperViscWKoeffKernel!(Fw,@Const(w),@Const(D),@Const(DW),@Const(dXdxI),
@Const(JJ),@Const(MW),@Const(Glob),KoeffDivW)

I, J, iz = @index(Local, NTuple)
_,_,Iz,IF = @index(Global, NTuple)

ColumnTilesDim = @uniform @groupsize()[3]
N = @uniform @groupsize()[1]
Nz = @uniform @ndrange()[3]
NF = @uniform @ndrange()[4]

ID = I + (J - 1) * N
@inbounds ind = Glob[ID,IF]

wCol = @localmem eltype(Fw) (N,N, ColumnTilesDim)
wCxCol = @localmem eltype(Fw) (N,N, ColumnTilesDim)
wCyCol = @localmem eltype(Fw) (N,N, ColumnTilesDim)
if Iz <= Nz && IF <= NF
@inbounds wCol[I,J,iz] = w[Iz,ind]
end
@synchronize

ID = I + (J - 1) * N
@inbounds ind = Glob[ID,IF]

if Iz <= Nz && IF <= NF
@inbounds Dxc = D[I,1] * wCol[1,J,iz]
@inbounds Dyc = D[J,1] * wCol[I,1,iz]
for k = 2 : N
@inbounds Dxc += D[I,k] * wCol[k,J,iz]
@inbounds Dyc += D[J,k] * wCol[I,k,iz]
end
@views @inbounds (GradDx, GradDy) = Grad12(Dxc,Dyc,dXdxI[1:2,1:2,:,ID,Iz,IF],JJ[ID,:,Iz,IF])
@views @inbounds (tempx, tempy) = Contra12(GradDx,GradDy,dXdxI[1:2,1:2,:,ID,Iz,IF])
@inbounds wCxCol[I,J,iz] = tempx
@inbounds wCyCol[I,J,iz] = tempy
end

@synchronize

ID = I + (J - 1) * N
@inbounds ind = Glob[ID,IF]
if Iz <= Nz && IF <= NF
@inbounds Divw = DW[I,1] * wCxCol[1,J,iz] + DW[J,1] * wCyCol[I,1,iz]
for k = 2 : N
@inbounds Divw += DW[I,k] * wCxCol[k,J,iz] + DW[J,k] * wCyCol[I,k,iz]
end
if Iz < Nz
@inbounds @atomic Fw[Iz,ind] += -KoeffDivW * Divw / MW[Iz,ind]
end
end
end

@kernel function DivRhoTrCentralKernel!(F,@Const(c),@Const(uC),@Const(vC),@Const(w),
@Const(D),@Const(dXdxI),
@Const(JJ),@Const(M),@Const(Glob))
Expand Down

0 comments on commit 741acef

Please sign in to comment.