diff --git a/BatchScripts/Mac/TestKernels.sh b/BatchScripts/Mac/TestKernels.sh new file mode 100755 index 0000000..3249103 --- /dev/null +++ b/BatchScripts/Mac/TestKernels.sh @@ -0,0 +1,2 @@ +export JuliaDevice="CPU" +julia --project TestKernels/testKernels.jl diff --git a/Examples/testFVGrad.jl b/Examples/testFVGrad.jl index 88daea2..a51b3c2 100644 --- a/Examples/testFVGrad.jl +++ b/Examples/testFVGrad.jl @@ -213,10 +213,16 @@ UNew = similar(U) @views UNewu = UNew[uPosS:uPosE] h = zeros(FTB,Grid.NumFaces) +Div = zeros(FTB,Grid.NumFaces) +DivEx = zeros(FTB,Grid.NumFaces) +uN = zeros(FTB,Grid.NumEdges) +uT = zeros(FTB,Grid.NumEdges) +uTEx = zeros(FTB,Grid.NumEdges) hGrad = zeros(FTB,Grid.NumEdges) -hGradE = zeros(FTB,Grid.NumEdges) +hGradEx = zeros(FTB,Grid.NumEdges) VelSp = zeros(Grid.NumFaces,2) VelSpE = zeros(Grid.NumFaces,2) + # Test gradient for iF = 1 : Grid.NumFaces x = Grid.Faces[iF].Mid.x @@ -232,28 +238,50 @@ for iE = 1 : Grid.NumEdges hGradx = 12 * x^3 hGrady = 12 * y^2 hGradz = 10 * z - hGradE[iE] = hGradx * Grid.Edges[iE].n.x + + hGradEx[iE] = hGradx * Grid.Edges[iE].n.x + hGrady * Grid.Edges[iE].n.y hGradz * Grid.Edges[iE].n.z end mul!(hGrad,Grad,h) FiniteVolumes.ConvertVelocitySp!(backend,FTB,VelSp,hGrad,Grid) Outputs.vtkSkeleton!(vtkSkeletonMesh, GridType*"FVGrad", Proc, ProcNumber, [h VelSp], 0) -FiniteVolumes.ConvertVelocitySp!(backend,FTB,VelSpE,hGradE,Grid) +FiniteVolumes.ConvertVelocitySp!(backend,FTB,VelSpE,hGradEx,Grid) Outputs.vtkSkeleton!(vtkSkeletonMesh, GridType*"FVGrad", Proc, ProcNumber, [h VelSpE], 1) Outputs.vtkSkeleton!(vtkSkeletonMesh, GridType*"FVGrad", Proc, ProcNumber, [h VelSp-VelSpE], 2) -# Test divergence +# Test tangential +VelCart=zeros(3) +VelSphere=zeros(3) for iE = 1 : Grid.NumEdges x = Grid.Edges[iF].Mid.x y = Grid.Edges[iF].Mid.y z = Grid.Edges[iF].Mid.z (lon,lat,r)= Grids.cart2sphere(x,y,z) - uS = 3*lon^4 + 4*lat^3 - vS = 4*lon^3 + 5 * lat^2 + VelSphere[1] 3*lon^4 + 4*lat^3 + VelSphere[2] = 4*lon^3 + 5 * lat^2 +# dulondlon = 12*lon^3 +# dulatdlat = 10*lat + VelCart = VelSphere2Cart(VelSphere,lon,lat) + n1 = Grid.Edges[iE].n.x + n2 = Grid.Edges[iE].n.y + n3 = Grid.Edges[iE].n.z + uN[iE] = n1 * VelCa[1] + n2 * VelCa[2] + n3 * VelCa[3] + t1 = Grid.Edges[iE].t.x + t2 = Grid.Edges[iE].t.y + t3 = Grid.Edges[iE].t.z + uTEx[iE] = t1 * VelCa[1] + t2 * VelCa[2] + t3 * VelCa[3] end +mul!(uT,Tang,uN) +FiniteVolumes.ConvertVelocityTSp!(backend,FTB,VelSpE,uTEx,Grid) +FiniteVolumes.ConvertVelocityTSp!(backend,FTB,VelSp,uT,Grid) +Outputs.vtkSkeleton!(vtkSkeletonMesh, GridType*"FVGrad", Proc, ProcNumber, [h VelSp], 3) +Outputs.vtkSkeleton!(vtkSkeletonMesh, GridType*"FVGrad", Proc, ProcNumber, [h VelSpE], 4) +Outputs.vtkSkeleton!(vtkSkeletonMesh, GridType*"FVGrad", Proc, ProcNumber, [h VelSp-VelSpE], 5) stop +# theta lat +# phi lon +# Div = 1/sin(theta)*d/dtheta(sin(theta)*u_theta) + 1/sin(theta)*d/dphi(u_phi) diff --git a/src/Examples/force.jl b/src/Examples/force.jl index d78738f..81123ef 100644 --- a/src/Examples/force.jl +++ b/src/Examples/force.jl @@ -3,15 +3,15 @@ abstract type AbstractForcing end Base.@kwdef struct HeldSuarezForcing <: AbstractForcing end function (Force::HeldSuarezForcing)(Param,Phys) - function local_force(U,p,lat) + function local_force(F,U,p,lat) FT = eltype(U) Sigma = p / Phys.p0 SigmaPowKappa = fast_powGPU(Sigma,Phys.kappa) height_factor = max(FT(0), (Sigma - Param.sigma_b) / (FT(1) - Param.sigma_b)) coslat = cos(lat) sinlat = sin(lat) - Fu = -(Param.k_f * height_factor) * U[2] - Fv = -(Param.k_f * height_factor) * U[3] + F[2] += -(Param.k_f * height_factor) * U[2] + F[3] += -(Param.k_f * height_factor) * U[3] if Sigma < FT(0.7) kT = Param.k_a + (Param.k_s - Param.k_a) * height_factor * coslat * coslat * coslat * coslat else @@ -21,8 +21,7 @@ function (Force::HeldSuarezForcing)(Param,Phys) Param.DeltaTh_z * log(Sigma) * coslat * coslat) * SigmaPowKappa Teq = max(Param.T_min, Teq) DeltaT = kT * (Phys.p0 * Sigma / (U[1] * Phys.Rd) - Teq) - FRhoTh = -U[1] * DeltaT / SigmaPowKappa - return FT(0),Fu,Fv,FT(0),FRhoTh + F[5] += -U[1] * DeltaT / SigmaPowKappa end return local_force end diff --git a/src/Examples/initial.jl b/src/Examples/initial.jl index 35e0bbe..d66e8ec 100755 --- a/src/Examples/initial.jl +++ b/src/Examples/initial.jl @@ -456,23 +456,25 @@ function (::HeldSuarezDryExample)(Param,Phys) w = FT(0) return (Rho,uS,vS,w,Th,qv) end - function Force(U,p,lat) + function Force(F,U,p,lat) FT = eltype(U) Sigma = p / Phys.p0 + SigmaPowKappa = fast_powGPU(Sigma,Phys.kappa) height_factor = max(FT(0), (Sigma - Param.sigma_b) / (FT(1) - Param.sigma_b)) - Fu = -(Param.k_f * height_factor) * U[2] - Fv = -(Param.k_f * height_factor) * U[3] + coslat = cos(lat) + sinlat = sin(lat) + F[2] += -(Param.k_f * height_factor) * U[2] + F[3] += -(Param.k_f * height_factor) * U[3] if Sigma < FT(0.7) - kT = Param.k_a + (Param.k_s - Param.k_a) * height_factor * cos(lat) * cos(lat) * cos(lat) * cos(lat) + kT = Param.k_a + (Param.k_s - Param.k_a) * height_factor * coslat * coslat * coslat * coslat else kT = FT(0) end - Teq = (Param.T_equator - Param.DeltaT_y * sin(lat) * sin(lat) - - Param.DeltaTh_z * log(Sigma) * cos(lat) * cos(lat)) * Sigma^Phys.kappa + Teq = (Param.T_equator - Param.DeltaT_y * sinlat * sinlat - + Param.DeltaTh_z * log(Sigma) * coslat * coslat) * SigmaPowKappa Teq = max(Param.T_min, Teq) DeltaT = kT * (Phys.p0 * Sigma / (U[1] * Phys.Rd) - Teq) - FRhoTh = -U[1] * DeltaT / Sigma^Phys.kappa - return FT(0),Fu,Fv,FT(0),FRhoTh + F[5] += -U[1] * DeltaT / SigmaPowKappa end return profile,Force end @@ -641,3 +643,7 @@ function z2Eta(x,y,z,Param,Phys) return Eta end + +# we may be hitting a slow path: +# https://stackoverflow.com/questions/14687665/very-slow-stdpow-for-bases-very-close-to-1 +fast_powGPU(x::FT, y::FT) where {FT <: AbstractFloat} = exp(y * log(x)) diff --git a/src/GPU/OperatorKernel.jl b/src/GPU/OperatorKernel.jl index 240d754..47ceaf2 100644 --- a/src/GPU/OperatorKernel.jl +++ b/src/GPU/OperatorKernel.jl @@ -755,12 +755,12 @@ end NumG = @uniform @ndrange()[2] if IC <= NumG - FRho,Fu,Fv,Fw,FRhoTh = Force(view(U,Iz,IC,1:5),p[Iz,IC],xS[2,IC]) - F[Iz,IC,1] += FRho - F[Iz,IC,2] += Fu - F[Iz,IC,3] += Fv - F[Iz,IC,4] += Fw - F[Iz,IC,5] += FRhoTh + Force(view(F,Iz,IC,:),view(U,Iz,IC,:),p[Iz,IC],xS[2,IC]) +# F[Iz,IC,1] += FRho +# F[Iz,IC,2] += Fu +# F[Iz,IC,3] += Fv +# F[Iz,IC,4] += Fw +# F[Iz,IC,5] += FRhoTh end end