diff --git a/AA b/AA new file mode 100644 index 0000000..c42a2c3 --- /dev/null +++ b/AA @@ -0,0 +1,50 @@ +@kernel function HyperViscTracerKernel!(FTr,@Const(Tr),@Const(Rho),@Const(D),@Const(DW),@Const(dXdxI), + @Const(JJ),@Const(M),@Const(Glob)) + + I, J, iz = @index(Local, NTuple) + _,_,Iz,IF = @index(Global, NTuple) + + ColumnTilesDim = @uniform @groupsize()[3] + N = @uniform @groupsize()[1] + Nz = @uniform @ndrange()[3] + NF = @uniform @ndrange()[4] + + ID = I + (J - 1) * N + @inbounds ind = Glob[ID,IF] + + TrCol = @localmem eltype(FTr) (N,N, ColumnTilesDim) + TrCxCol = @localmem eltype(FTr) (N,N, ColumnTilesDim) + TrCyCol = @localmem eltype(FTr) (N,N, ColumnTilesDim) + if Iz <= Nz && IF <= NF + @inbounds TrCol[I,J,iz] = Tr[Iz,ind] / Rho[Iz,ind] + end + @synchronize + + ID = I + (J - 1) * N + @inbounds ind = Glob[ID,IF] + + if Iz <= Nz && IF <= NF + @inbounds Dxc = D[I,1] * TrCol[1,J,iz] + @inbounds Dyc = D[J,1] * TrCol[I,1,iz] + for k = 2 : N + @inbounds Dxc += D[I,k] * TrCol[k,J,iz] + @inbounds Dyc += D[J,k] * TrCol[I,k,iz] + end + @views @inbounds (GradDx, GradDy) = Grad12(Dxc,Dyc,dXdxI[1:2,1:2,:,ID,Iz,IF],JJ[ID,:,Iz,IF]) + @views @inbounds (tempx, tempy) = Contra12(GradDx,GradDy,dXdxI[1:2,1:2,:,ID,Iz,IF]) + @inbounds TrCxCol[I,J,iz] = tempx + @inbounds TrCyCol[I,J,iz] = tempy + end + + @synchronize + + ID = I + (J - 1) * N + @inbounds ind = Glob[ID,IF] + if Iz <= Nz && IF <= NF + @inbounds DivTr = DW[I,1] * TrCxCol[1,J,iz] + DW[J,1] * TrCyCol[I,1,iz] + for k = 2 : N + @inbounds DivTr += DW[I,k] * TrCxCol[k,J,iz] + DW[J,k] * TrCyCol[I,k,iz] + end + @inbounds @atomic FTr[Iz,ind] += DivTr / M[Iz,ind] + end +end diff --git a/Examples/testPolynom.jl b/Examples/testPolynom.jl new file mode 100644 index 0000000..8a79b6a --- /dev/null +++ b/Examples/testPolynom.jl @@ -0,0 +1,7 @@ +import CGDycore: + Examples, Parallels, Models, Grids, Outputs, Integration, GPU, DyCore, FiniteElements +using MPI + +function main(Order) + F1 = FiniteElements.W1(Order) +end diff --git a/GlobInd b/GlobInd index 0078859..88e2e5f 100644 Binary files a/GlobInd and b/GlobInd differ diff --git a/Jobs/NHSphere/JobNHBaroWaveDrySphereOro b/Jobs/NHSphere/JobNHBaroWaveDrySphereOro index 8a0521f..410e018 100755 --- a/Jobs/NHSphere/JobNHBaroWaveDrySphereOro +++ b/Jobs/NHSphere/JobNHBaroWaveDrySphereOro @@ -1,38 +1,44 @@ mpirun -n 6 julia --project Examples/testNHSphere.jl \ - --Problem="BaroWaveDrySphereOro" \ + --Problem="BaroWaveDrySphere" \ + --Device="CPU" \ + --GPUType="Metal" \ + --FloatTypeBackend="Float32" \ + --NumberThreadGPU=512 \ --NumV=5 \ --NumTr=0 \ - --ProfRho="BaroWaveDrySphere" \ - --ProfTheta="BaroWaveDrySphere" \ - --ProfVel="BaroWaveDrySphere" \ + --ProfpBGrd="" \ + --ProfRhoBGrd="" \ --Source=false \ - --ProfpBGrd="IsoThermal" \ - --ProfRhoBGrd="IsoThermal" \ - --RefProfile=false \ - --Coriolis=true \ - --SurfaceFluxMom=false \ - --VerticalDiffusionMom=false \ - --Curl=true\ + --Forcing=false \ + --Curl=false \ --ModelType="VectorInvariant" \ - --StrideDamp=15000 \ - --Relax=1.0e-2 \ + --Coriolis=true \ + --VerticalDiffusion=false \ --Upwind=true \ --HorLimit=false \ + --Buoyancy=true \ --Decomp="EqualArea" \ - --SimDays=20 \ - --PrintDays=1 \ - --dtau=300.0 \ + --SimDays=10 \ + --SimSeconds=0 \ + --PrintSeconds=150 \ + --PrintMinutes=0 \ + --PrintHours=0 \ + --PrintDays=0 \ + --StartAverageDays=100 \ + --Flat=true \ + --dtau=75 \ --IntMethod="Rosenbrock" \ --Table="SSP-Knoth" \ --TopoS="EarthOrography" \ --Stretch=true \ --StretchType="Exp" \ --GridType="CubedSphere" \ - --nz=45 \ - --nPanel=16 \ + --nz=64 \ + --nPanel=30 \ --H=30000.0 \ --OrdPoly=3 \ --HyperVisc=true \ - --HyperDCurl=1.e16 \ - --HyperDGrad=1.e16 \ - --HyperDDiv=1.e16 + --HyperDCurl=1.e15 \ + --HyperDGrad=1.e15 \ + --HyperDDiv=1.e15 \ + --HyperDDivW=1.e15 diff --git a/Project.toml b/Project.toml index d20129a..254c653 100644 --- a/Project.toml +++ b/Project.toml @@ -29,6 +29,7 @@ ProfileCanvas = "efd6af41-a80b-495e-886c-e51b0c7d77a3" Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" RootSolvers = "7181ea78-2dcb-4de3-ab41-2b8ab5a31e74" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +SpecialPolynomials = "a25cea48-d430-424a-8ee7-0d3ad3742e9e" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StrideArraysCore = "7792a7ef-975c-4747-a70f-980b88e8d1da" diff --git a/src/CGDycore.jl b/src/CGDycore.jl index fcc2570..5677a7b 100644 --- a/src/CGDycore.jl +++ b/src/CGDycore.jl @@ -28,6 +28,7 @@ include("Outputs/Outputs.jl") include("Integration/Integration.jl") include("DyCore/DyCore.jl") include("GPU/GPU.jl") +include("FiniteElements/FiniteElements.jl") OOP = 5 diff --git a/src/DyCore/DyCore.jl b/src/DyCore/DyCore.jl index 1a65991..438f8bd 100644 --- a/src/DyCore/DyCore.jl +++ b/src/DyCore/DyCore.jl @@ -41,7 +41,6 @@ include("Source.jl") include("simpson.jl") include("Diffusion.jl") include("ThreadCache.jl") -include("TopographySmoothing.jl") include("parse_commandline.jl") include("GlobalVariables.jl") include("InitDriver.jl") diff --git a/src/DyCore/GlobalVariables.jl b/src/DyCore/GlobalVariables.jl index 8807243..b744d7c 100644 --- a/src/DyCore/GlobalVariables.jl +++ b/src/DyCore/GlobalVariables.jl @@ -70,6 +70,7 @@ mutable struct OutputStruct RadPrint::Float64 H::Float64 OrdPrint::Int + dTol::Float64 end function OutputStruct() vtk=0 @@ -90,6 +91,7 @@ function OutputStruct() RadPrint=1000.0 H=1000.0 OrdPrint=1 + dTol=1.e-8 return OutputStruct( vtk, vtkFileName, @@ -109,6 +111,7 @@ function OutputStruct() RadPrint, H, OrdPrint, + dTol, ) end diff --git a/src/DyCore/InitDriver.jl b/src/DyCore/InitDriver.jl index dad688a..cf6b948 100644 --- a/src/DyCore/InitDriver.jl +++ b/src/DyCore/InitDriver.jl @@ -65,24 +65,9 @@ function InitSphere(backend,FT,OrdPoly,OrdPolyZ,nz,nPanel,H,GridType,Topography, Global = GlobalStruct{FT}(backend,SubGrid,Model,TimeStepper,ParallelCom,Output,DoF,nz, Model.NumV,Model.NumTr,()) CG = CGQuad{FT}(backend,OrdPoly,OrdPolyZ,Global.Grid) - (CG,Metric) = DiscretizationCG(backend,FT,Grids.JacobiSphere3,CG,Exchange,Global) - - # Output partition - nzTemp = Global.Grid.nz - Global.Grid.nz = 1 - vtkCachePart = Outputs.vtkStruct{FT}(backend,1,Grids.TransSphereX!,CG,Metric,Global) - Outputs.unstructured_vtkPartition(vtkCachePart,Global.Grid.NumFaces,Proc,ProcNumber) - Global.Grid.nz = nzTemp if Topography.TopoS == "EarthOrography" - zS = Grids.Orography(CG,Global) - Output.RadPrint = H - Output.Flat=false - nzTemp = Global.Grid.nz - Global.Grid.nz = 1 - vtkCacheOrography = Outputs.vtkStruct(OrdPoly,Grids.TransSphereX,CG,Global) - Outputs.unstructured_vtkOrography(zS,vtkCacheOrography,Global.Grid.NumFaces,CG,Proc,ProcNumber) - Global.Grid.nz = nzTemp + zS = Grids.Orography(backend,FT,CG,Exchange,Global) end if Topography.TopoS == "EarthOrography" @@ -90,6 +75,22 @@ function InitSphere(backend,FT,OrdPoly,OrdPolyZ,nz,nPanel,H,GridType,Topography, else (CG,Metric) = DiscretizationCG(backend,FT,Grids.JacobiSphere3,CG,Exchange,Global) end + Global.Output.dTol = 2*pi / nPanel + # Output Orography + if Topography.TopoS == "EarthOrography" + Output.Flat=true + nzTemp = Global.Grid.nz + Global.Grid.nz = 1 + vtkCacheOrography = Outputs.vtkInit2D(CG.OrdPoly,Grids.TransSphereX!,CG,Metric,Global) + Outputs.unstructured_vtkOrography(zS,vtkCacheOrography,Global.Grid.NumFaces,CG,Proc,ProcNumber) + Global.Grid.nz = nzTemp + end + # Output partition + nzTemp = Global.Grid.nz + Global.Grid.nz = 1 + vtkCachePart = Outputs.vtkStruct{FT}(backend,1,Grids.TransSphereX!,CG,Metric,Global) + Outputs.unstructured_vtkPartition(vtkCachePart,Global.Grid.NumFaces,Proc,ProcNumber) + Global.Grid.nz = nzTemp return CG,Metric,Exchange,Global end diff --git a/src/DyCore/TopographySmoothing.jl b/src/DyCore/TopographySmoothing.jl deleted file mode 100644 index 3795adc..0000000 --- a/src/DyCore/TopographySmoothing.jl +++ /dev/null @@ -1,156 +0,0 @@ -function TopographySmoothing2!(hFCG,hCG,CG,Global,HyperDDiv) - - OP=CG.OrdPoly+1; - NF=Global.Grid.NumFaces; - Div = zeros(CG.NumG) - hF = zeros(CG.NumG) - DivCG= zeros(OP,OP) - @. Div = 0.0 - @. hF = 0.0 - D1cCG = zeros(OP,OP) - D2cCG = zeros(OP,OP) - grad1CG = zeros(OP,OP) - grad2CG = zeros(OP,OP) - - D1gradCG = D1cCG - D2gradCG = D2cCG - - vC1 = grad1CG - vC2 = grad2CG - - # Hyperdiffusion - @inbounds for iF = 1:NF - @views J = Global.Metric.J[:,:,:,1,iF]; - @views dXdxI = Global.Metric.dXdxI[:,:,:,1,:,:,iF] - - @views mul!(D1cCG[:,:],CG.DS,hCG[:,:,iF]) - @views mul!(D2cCG[:,:],hCG[:,:,iF],CG.DST) - - @views @. grad1CG[:,:] = (dXdxI[:,:,1,1,1] + dXdxI[:,:,2,1,1]) * D1cCG[:,:] + - (dXdxI[:,:,1,2,1] + dXdxI[:,:,2,2,1]) * D2cCG[:,:] - @views @. grad2CG[:,:] = (dXdxI[:,:,1,1,2] + dXdxI[:,:,2,1,2]) * D1cCG[:,:] + - (dXdxI[:,:,1,2,2] + dXdxI[:,:,2,2,2]) * D2cCG[:,:] - - @views @. D1gradCG[:,:] = (dXdxI[:,:,1,1,1] + dXdxI[:,:,2,1,1]) * grad1CG[:,:] + - (dXdxI[:,:,1,1,2] + dXdxI[:,:,2,1,2]) * grad2CG[:,:] - @views @. D2gradCG[:,:] = (dXdxI[:,:,1,1,2,1] + dXdxI[:,:,2,1,2,1]) * grad1CG[:,:] + - (dXdxI[:,:,1,2,2] + dXdxI[:,:,2,2,2]) * grad2CG[:,:] - - @views mul!(vC1[:,:],CG.DW,D1gradCG[:,:]) - @views mul!(vC2[:,:],D2gradCG[:,:],CG.DWT) - @views @. DivCG[:,:] = (vC1[:,:] + vC2[:,:]) / (J[:,:,1] + J[:,:,2]) - @inbounds for jP=1:OP - @inbounds for iP=1:OP - ind = CG.Glob[iP,jP,iF] - Div[ind] += DivCG[iP,jP] / CG.M[1,ind] - end - end - end - - # Hyperdiffusion - @inbounds for iF = 1:NF - @views J = Global.Metric.J[:,:,:,1,iF]; - @views dXdxI = Global.Metric.dXdxI[:,:,:,:,:,:,iF] - @inbounds for jP=1:OP - @inbounds for iP=1:OP - ind = CG.Glob[iP,jP,iF] - DivCG[iP,jP] = Div[ind] - end - end - @views mul!(D1cCG[:,:],CG.DS,DivCG[:,:]) - @views mul!(D2cCG[:,:],DivCG[:,:],CG.DST) - - @views @. grad1CG[:,:] = (dXdxI[:,:,1,1,1] + dXdxI[:,:,2,1,1]) * D1cCG[:,:] + - (dXdxI[:,:,1,2,1] + dXdxI[:,:,2,2,1]) * D2cCG[:,:] - @views @. grad2CG[:,:] = (dXdxI[:,:,1,1,2] + dXdxI[:,:,2,1,2]) * D1cCG[:,:] + - (dXdxI[:,:,1,2,2] + dXdxI[:,:,2,2,2]) * D2cCG[:,:] - - @views @. D1gradCG[:,:] = (dXdxI[:,:,1,1,1] + dXdxI[:,:,2,1,1]) * grad1CG[:,:] + - (dXdxI[:,:,1,1,2] + dXdxI[:,:,2,1,2]) * grad2CG[:,:] - @views @. D2gradCG[:,:] = (dXdxI[:,:,1,1,2,1] + dXdxI[:,:,2,1,2,1]) * grad1CG[:,:] + - (dXdxI[:,:,1,2,2] + dXdxI[:,:,2,2,2]) * grad2CG[:,:] - - @views mul!(vC1[:,:],CG.DW,D1gradCG[:,:]) - @views mul!(vC2[:,:],D2gradCG[:,:],CG.DWT) - @views @. DivCG[:,:] -= HyperDDiv*(vC1[:,:] + vC2[:,:]) / (J[:,:,1] + J[:,:,2]) - - @inbounds for jP=1:OP - @inbounds for iP=1:OP - ind = CG.Glob[iP,jP,iF] - hF[ind] += DivCG[iP,jP] / CG.M[1,ind] - end - end - end - ExchangeData!(hF,Global.Exchange) - - @inbounds for iF = 1:NF - @inbounds for jP=1:OP - @inbounds for iP=1:OP - ind = CG.Glob[iP,jP,iF] - hFCG[iP,jP,iF] = hF[ind] - end - end - end -end - -function TopographySmoothing1!(hFCG,hCG,CG,Global,HyperDDiv) - - OP=CG.OrdPoly+1; - NF=Global.Grid.NumFaces; - J = Global.Metric.J - Div = zeros(CG.NumG) - hF = zeros(CG.NumG) - DivCG= zeros(OP,OP) - @. Div = 0.0 - @. hF = 0.0 - D1cCG = zeros(OP,OP) - D2cCG = zeros(OP,OP) - grad1CG = zeros(OP,OP) - grad2CG = zeros(OP,OP) - - D1gradCG = D1cCG - D2gradCG = D2cCG - - vC1 = grad1CG - vC2 = grad2CG - - # Diffusion - @inbounds for iF = 1:NF - @views J = Global.Metric.J[:,:,:,1,iF]; - @views dXdxI = Global.Metric.dXdxI[:,:,:,1,:,:,iF] - - @views mul!(D1cCG[:,:],CG.DS,hCG[:,:,iF]) - @views mul!(D2cCG[:,:],hCG[:,:,iF],CG.DST) - - - @views @. grad1CG[:,:] = (dXdxI[:,:,1,1,1] + dXdxI[:,:,2,1,1]) * D1cCG[:,:] + - (dXdxI[:,:,1,2,1] + dXdxI[:,:,2,2,1]) * D2cCG[:,:] - @views @. grad2CG[:,:] = (dXdxI[:,:,1,1,2] + dXdxI[:,:,2,1,2]) * D1cCG[:,:] + - (dXdxI[:,:,1,2,2] + dXdxI[:,:,2,2,2]) * D2cCG[:,:] - - @views @. D1gradCG[:,:] = (dXdxI[:,:,1,1,1] + dXdxI[:,:,2,1,1]) * grad1CG[:,:] + - (dXdxI[:,:,1,1,2] + dXdxI[:,:,2,1,2]) * grad2CG[:,:] - @views @. D2gradCG[:,:] = (dXdxI[:,:,1,2,1] + dXdxI[:,:,2,2,1]) * grad1CG[:,:] + - (dXdxI[:,:,1,2,2] + dXdxI[:,:,2,2,2]) * grad2CG[:,:] - - @views mul!(vC1[:,:],CG.DW,D1gradCG[:,:]) - @views mul!(vC2[:,:],D2gradCG[:,:],CG.DWT) - @views @. DivCG[:,:] = HyperDDiv*(vC1[:,:] + vC2[:,:]) / (J[:,:,1] + J[:,:,2]) - @inbounds for jP=1:OP - @inbounds for iP=1:OP - ind = CG.Glob[iP,jP,iF] - hF[ind] += DivCG[iP,jP] / CG.M[1,ind] - end - end - end - ExchangeData!(hF,Global.Exchange) - - @inbounds for iF = 1:NF - @inbounds for jP=1:OP - @inbounds for iP=1:OP - ind = CG.Glob[iP,jP,iF] - hFCG[iP,jP,iF] = hF[ind] - end - end - end -end diff --git a/src/FiniteElements/FiniteElements.jl b/src/FiniteElements/FiniteElements.jl index 1f3f637..6b57914 100644 --- a/src/FiniteElements/FiniteElements.jl +++ b/src/FiniteElements/FiniteElements.jl @@ -1,6 +1,7 @@ module FiniteElements using Polynomials +using SpecialPolynomials abstract type FiniteElement end @@ -11,5 +12,35 @@ struct NodalElement <: FiniteElement Diff1Matrix::Array{Float64,2} end +struct W1 <: FiniteElement + Order::Int + phiv::Array{Polynomial,3} + phie::Array{Polynomial,3} +end + +function W1(Order) + phiv = Array{Polynomial,3}(undef,1,2,3) + phiv[1,1,1] = Polynomial([0.5,-0.5]) + phiv[1,1,2] = Polynomial([0.5,0.5]) + phiv[1,1,3] = Polynomial([1]) + phiv[1,2,1] = Polynomial([0.5,-0.5]) + phiv[1,2,2] = Polynomial([0.5,-0.5]) + phiv[1,2,3] = Polynomial([0.5,0.5]) + if Order > 3 + phie = Array{Polynomial,3}(undef,Order-2,2,3) + for m = 1 : Order - 2 + phie[m,1,1] = Polynomial([0.5,0.5]) * + Polynomial([0.5,-0.5]) + end + else + phie = Array{Polynomial,3}(undef,0,2,3) + end + + return W1( + Order, + phiv, + phie, + ) +end end diff --git a/src/GPU/surface.jl b/src/GPU/surface.jl index eb23743..622fdd3 100644 --- a/src/GPU/surface.jl +++ b/src/GPU/surface.jl @@ -33,3 +33,15 @@ end (v2 - nS[2] * nU) * (v2 - nS[2] * nU) + (wC - nS[3] * nU) * (wC - nS[3] * nU)) end + +Base.@kwdef struct MOSurface <: SurfaceValues end +function (::MOSurface)(Phys,Param,uPos,vPos,wPos) + function SurfaceData(U,p,dXdxI,nS) + FT = eltype(U) + uStar = uStarCoefficientGPU(U[uPos],U[vPos],U[wPos],dXdxI,nS) + CT = FT(Param.CE) + CH = FT(Param.CH) + return uStar, CT, CH + end + return SurfaceData +end diff --git a/src/Grids/Grids.jl b/src/Grids/Grids.jl index 816604e..8f6cdb1 100644 --- a/src/Grids/Grids.jl +++ b/src/Grids/Grids.jl @@ -53,5 +53,6 @@ include("InputGrid.jl") include("polygon.jl") include("intersect.jl") include("interpolate.jl") +include("TopographySmoothing.jl") end diff --git a/src/Grids/JacobiSphere3GPU.jl b/src/Grids/JacobiSphere3GPU.jl index f883c38..97f2271 100644 --- a/src/Grids/JacobiSphere3GPU.jl +++ b/src/Grids/JacobiSphere3GPU.jl @@ -4,6 +4,23 @@ # FT = eltype(X) # #end +function JacobiSphere2GPU!(X,dXdxI,J,FE,F,Rad) + + backend = get_backend(X) + FT = eltype(X) + + NF = size(X,3) + N = size(FE.xw,1) + + NFG = min(div(512,N*N),NF) + group = (N, N, NFG) + ndrange = (N, N, NF) + + KJacobiSphere2Kernel! = JacobiSphere2Kernel!(backend,group) + + KJacobiSphere2Kernel!(X,dXdxI,J,FE.xw,FE.DS,F,Rad,ndrange=ndrange) +end + function JacobiSphere3GPU!(X,dXdxI,J,FE,F,z,zs,Rad) backend = get_backend(X) @@ -22,6 +39,27 @@ function JacobiSphere3GPU!(X,dXdxI,J,FE,F,z,zs,Rad) KJacobiSphere3Kernel!(X,dXdxI,J,FE.xw,FE.xwZ,FE.DS,F,z,Rad,zs,ndrange=ndrange) end +@kernel function JacobiSphere2Kernel!(X,dXdxI,JJ,@Const(ksi),@Const(D),@Const(F),Rad) + + gi, gj, gF = @index(Group, NTuple) + I, J, iF = @index(Local, NTuple) + _,_,IF = @index(Global, NTuple) + + FaceTilesDim = @uniform @groupsize()[3] + N = @uniform @groupsize()[1] + NF = @uniform @ndrange()[3] + + dXdx = @localmem eltype(X) (N,N,2,2,FaceTilesDim) + + eta = ksi + if IF <= NF + ID = I + (J - 1) * N + @views @inbounds JacobiSphere2Loc!(X[ID,:,IF],dXdx[I,J,:,:,iF],ksi[I],eta[J],F[:,:,IF],Rad) + @views @inbounds JJ[ID,IF] = Det2(dXdx[I,J,:,:,iF]) + @views @inbounds Adjunct2!(dXdxI[:,:,ID,IF],dXdx[I,J,:,:,iF]) + end +end + @kernel function JacobiSphere3Kernel!(X,dXdxI,JJ,@Const(ksi),@Const(zeta),@Const(D), @Const(F),@Const(z),Rad,@Const(zs)) @@ -67,6 +105,87 @@ end end end +@inline function JacobiSphere2Loc!(X,dXdx,ksi1,ksi2,F,Rad) + zero = eltype(X)(0) + one = eltype(X)(1) + half = eltype(X)(1/2) + quarter = eltype(X)(1/4) + X1 = quarter * (F[1,1] * (one-ksi1)*(one-ksi2) + + F[2,1] * (one+ksi1)*(one-ksi2) + + F[3,1] * (one+ksi1)*(one+ksi2) + + F[4,1] * (one-ksi1)*(one+ksi2)) + X2 = quarter * (F[1,2] * (one-ksi1)*(one-ksi2) + + F[2,2] * (one+ksi1)*(one-ksi2) + + F[3,2] * (one+ksi1)*(one+ksi2) + + F[4,2] * (one-ksi1)*(one+ksi2)) + X3 = quarter * (F[1,3] * (one-ksi1)*(one-ksi2) + + F[2,3] * (one+ksi1)*(one-ksi2) + + F[3,3] * (one+ksi1)*(one+ksi2) + + F[4,3] * (one-ksi1)*(one+ksi2)) + + r = sqrt(X1 * X1 + X2 * X2 + X3 * X3) + f = Rad / r + X1 = X1 / r + X2 = X2 / r + X3 = X3 / r + (lam,theta)=cart2sphere(X1,X2,X3) + + DD=@SArray([-sin(lam) cos(lam) zero; + zero zero one]) + + sinlam = sin(lam) + coslam = cos(lam) + sinth = sin(theta) + costh = cos(theta) + a11 = sinlam * sinlam * costh * costh + sinth * sinth + a12 = -sinlam * coslam * costh * costh + a13 = -coslam * sinth * costh + a21 = a12 + a22 = coslam * coslam * costh * costh + sinth * sinth + a23 = -sinlam * sinth * costh + a31 = -coslam * sinth + a32 = -sinlam * sinth + a33 = costh + A = @SArray([a11 a12 a13; + a21 a22 a23; + a31 a32 a33]) + + B = @SArray([F[1,1] F[2,1] F[3,1] F[4,1]; + F[1,2] F[2,2] F[3,2] F[4,2]; + F[1,3] F[2,3] F[3,3] F[4,3]]) + + C = @SArray([-one+ksi2 -one+ksi1; + one-ksi2 -one-ksi1; + one+ksi2 one+ksi1; + -one-ksi2 one-ksi1]) + D = quarter * f * DD * A * B * C + dXdx[1,1] = D[1,1] + dXdx[1,2] = D[1,2] + dXdx[2,1] = D[2,1] + dXdx[2,2] = D[2,2] + X[1] = X1 * Rad + X[2] = X2 * Rad + X[3] = X3 * Rad + +end + +@inline function Det2(A) + A[1,1] * A[2,2] - A[1,2] * A[2,1] +end + +@inline function Adjunct2!(Ad,A) +# A[1,1] A[1,2] A[1,3] +# A[2,1] A[2,2] A[2,3] +# A[3,1] A[3,2] A[3,3] + + Ad[1,1] = A[2,2] + Ad[2,1] = -A[2,1] + + Ad[1,2] = -A[1,2] + Ad[2,2] = A[1,1] + +end + @inline function JacobiSphere3Loc!(X,dXdx,hR,ksi1,ksi2,ksi3,F,z1,z2,Rad,H,zs) zero = eltype(X)(0) one = eltype(X)(1) @@ -85,8 +204,9 @@ end F[3,3] * (one+ksi1)*(one+ksi2) + F[4,3] * (one-ksi1)*(one+ksi2)) zLoc = half * ((one-ksi3) * z1 + (one+ksi3) * z2) - hR = zLoc + (H - zLoc) * zs / H - D33 = one - zs / H; +# hR = zLoc + (H - zLoc) * zs / H +# D33 = one - zs / H; + hR, D33 = GalChen(zLoc,H,zs) D33 = half * D33*(z2-z1) r = sqrt(X1 * X1 + X2 * X2 + X3 * X3) @@ -164,3 +284,22 @@ end Ad[3,3] = A[1,1] * A[2,2] - A[1,2] * A[2,1] end +@inline function GalChen(zRef,H,zs) + z = zRef + (H - zRef) * zs / H + DzDzRef = eltype(zRef)(1) - zs / H + return z, DzDzRef +end + +@inline function Sleve(zRef,H,zs) + etaH = eltype(zRef)(.7) + s = eltype(zRef)(8/10) + eta = zRef / H + if eta <= etaH + z = eta * H + zs * sinh((etaH - eta) / s / etaH) / sinh(1 / s) + DzDzRef = eltype(zRef)(1) - zs / H / s / etaH * cosh((etaH - eta) / s / etaH) / sinh(1 / s) + else + z = eta * H + DzDzRef = eltype(zRef)(1) + end + return z, DzDzRef +end diff --git a/src/Grids/TopoNeu.jl b/src/Grids/TopoNeu.jl index 36b6954..89b4589 100644 --- a/src/Grids/TopoNeu.jl +++ b/src/Grids/TopoNeu.jl @@ -75,8 +75,10 @@ end function TopoDataETOPO(MinLonL,MaxLonL,MinLonR,MaxLonR,MinLat,MaxLat) # Load ETOPO1 ice-sheet surface data # Ocean values are considered 0 - ds = NCDataset("ETOPO1_Ice_g_gdal.grd") +# ds = NCDataset("ETOPO1_Ice_g_gdal.grd") + ds = NCDataset("ETOPO_2022_v1_60s_N90W180_surface.nc") # Unpack information + #= x_range = ds["x_range"][:] y_range = ds["y_range"][:] z_range = ds["z_range"][:] @@ -87,6 +89,13 @@ function TopoDataETOPO(MinLonL,MaxLonL,MinLonR,MaxLonR,MinLat,MaxLat) lat = collect(y_range[1]:spacing[2]:y_range[2]) nlon = dimension[1] nlat = dimension[2] + =# + lon = ds["lon"][:] + @show minimum(lon),maximum(lon) + lat = ds["lat"][:] + elevation = ds["z"][:] + nlon = size(lon,1) + nlat = size(lat,1) dLon = 360.0 / nlon dLat = 180.0 / nlat ilonLS = max(floor(Int,(MinLonL+180.)/dLon),1) @@ -103,7 +112,8 @@ function TopoDataETOPO(MinLonL,MaxLonL,MinLonR,MaxLonR,MinLat,MaxLat) temp = max.(reshape(elevation, (nlon, nlat)), 0.0) zlevels = zeros(Float64,nlon,nlat) @inbounds for i = 1 : nlat - @. zlevels[:,i] = temp[:,nlat+1-i] +# @. zlevels[:,i] = temp[:,nlat+1-i] + @. zlevels[:,i] = temp[:,i] end return (lon[ilonLS:ilonLE], lon[ilonRS:ilonRE], lat[ilatS:ilatE], zlevels[ilonLS:ilonLE,ilatS:ilatE], zlevels[ilonRS:ilonRE,ilatS:ilatE]) @@ -204,7 +214,7 @@ function TopoDataGLOBE() end end -function Orography(backend,FT,CG,Global,TopoProfile) +function Orography(backend,FT,CG,Exchange,Global,TopoProfile) Grid = Global.Grid Faces = Grid.Faces Proc = Global.ParallelCom.Proc @@ -246,39 +256,23 @@ function Orography(backend,FT,CG,Global,TopoProfile) return HeightCG end -function Orography(CG,Global) +function Orography(backend,FT,CG,Exchange,Global) Grid = Global.Grid Proc = Global.ParallelCom.Proc OrdPoly = CG.OrdPoly + Glob = CG.Glob + NumG = CG.NumG (MinLonL,MaxLonL,MinLonR,MaxLonR,MinLat,MaxLat) = BoundingBox(Grid) RadEarth = Grid.Rad NF = Grid.NumFaces OP = OrdPoly + 1 HeightCG = zeros(Float64,OP,OP,NF) (lonL, lonR, lat, zLevelL, zLevelR) = TopoDataETOPO(MinLonL,MaxLonL,MinLonR,MaxLonR,MinLat,MaxLat) -# (lon, lat, zLevel) = CGDycore.TopoData() start_Face = 1 - (Glob,NumG) = NumberingFemCG(Grid,OrdPoly); Height = zeros(Float64,NumG) + HeightGPU = KernelAbstractions.zeros(backend,FT,NumG) NumHeight = zeros(Float64,NumG) -# (w,xw) = GaussLobattoQuad(OrdPoly) - xe = zeros(OrdPoly+1) - xe[1] = -1.0 - @inbounds for i = 2 : OrdPoly - xe[i] = CG.xe[i-1] + 2.0/OrdPoly - end - xe[OrdPoly+1] = 1.0 - -# LenLat = length(lat) -# LenLon = length(lon) -# dLon = 360.0 / LenLon -# dLat = 180.0 / LenLat -# ilonLS = max(floor(Int,(MinLonL+180.)/dLon),1) -# ilonLE = min(ceil(Int,(MaxLonL+180.)/dLon),LenLon) -# ilonRS = max(floor(Int,(MinLonR+180.)/dLon),1) -# ilonRE = min(ceil(Int,(MaxLonR+180.)/dLon),LenLon) -# ilatS = max(floor(Int,(MinLat+90.)/dLat),1) -# ilatE = min(ceil(Int,(MaxLat+90.)/dLat),LenLat) + xe = CG.xe @inbounds for ilat = 1 : length(lat) @inbounds for ilon = 1 : length(lonL) P = Point(sphereDeg2cart(lonL[ilon],lat[ilat],RadEarth)) @@ -286,8 +280,9 @@ function Orography(CG,Global) start_Face = Face_id Inside = InsideFace(P,Grid.Faces[start_Face],Grid) if Inside - iG = Glob[iPosFace_id,jPosFace_id,Face_id] - Height[iG] += zLevelL[ilon,ilat] + iD = iPosFace_id + (jPosFace_id - 1) * (OrdPoly + 1) + iG = Glob[iD,Face_id] + Height[iG] += max(zLevelL[ilon,ilat],0) NumHeight[iG] += 1 end end @@ -297,19 +292,21 @@ function Orography(CG,Global) start_Face = Face_id Inside = InsideFace(P,Grid.Faces[start_Face],Grid) if Inside - iG = Glob[iPosFace_id,jPosFace_id,Face_id] - Height[iG] += zLevelR[ilon,ilat] + iD = iPosFace_id + (jPosFace_id - 1) * (OrdPoly + 1) + iG = Glob[iD,Face_id] + Height[iG] += max(zLevelR[ilon,ilat],0) NumHeight[iG] += 1 end end end - ExchangeData!(Height,Global.Exchange) - ExchangeData!(NumHeight,Global.Exchange) + Parallels.ExchangeData!(Height,Exchange) + Parallels.ExchangeData!(NumHeight,Exchange) @. Height /= (NumHeight + 1.e-14) @inbounds for iF = 1:NF @inbounds for jP=1:OP @inbounds for iP=1:OP - ind = Glob[iP,jP,iF] + iD = iP + (jP - 1) * OP + ind = Glob[iD,iF] HeightCG[iP,jP,iF] = Height[ind] end end @@ -317,13 +314,31 @@ function Orography(CG,Global) @inbounds for iF = 1:NF @views ChangeBasisHeight!(HeightCG[:,:,iF],HeightCG[:,:,iF],CG) end - SmoothFac=1.e9 -# SmoothFac=1.e15 - FHeightCG = similar(HeightCG) - @inbounds for i=1:30 - TopographySmoothing1!(FHeightCG,HeightCG,CG,Global,SmoothFac) - @. HeightCG += FHeightCG - @. HeightCG = max(HeightCG,0.0) + @inbounds for iF = 1:NF + @inbounds for jP=1:OP + @inbounds for iP=1:OP + iD = iP + (jP - 1) * OP + ind = Glob[iD,iF] + Height[ind] = HeightCG[iP,jP,iF] + end + end + end + + copyto!(HeightGPU,Height) + @show maximum(HeightGPU) + @show minimum(HeightGPU) + TopographySmoothing!(HeightGPU,CG,Exchange,Global) + @show maximum(HeightGPU) + @show minimum(HeightGPU) + copyto!(Height,HeightGPU) + @inbounds for iF = 1:NF + @inbounds for jP=1:OP + @inbounds for iP=1:OP + iD = iP + (jP - 1) * OP + ind = Glob[iD,iF] + HeightCG[iP,jP,iF] = Height[ind] + end + end end @show maximum(HeightCG) @show minimum(HeightCG) diff --git a/src/Grids/TopographySmoothing.jl b/src/Grids/TopographySmoothing.jl new file mode 100644 index 0000000..2e64b90 --- /dev/null +++ b/src/Grids/TopographySmoothing.jl @@ -0,0 +1,170 @@ +function TopographySmoothing!(Height,CG,Exchange,Global) + + backend = get_backend(Height) + FT = eltype(Height) + + Grid = Global.Grid + DoF = CG.DoF + N = CG.OrdPoly + 1 + NF = Global.Grid.NumFaces + X = KernelAbstractions.zeros(backend,FT,DoF,3,NF) + dXdxI = KernelAbstractions.zeros(backend,FT,2,2,DoF,NF) + J = KernelAbstractions.zeros(backend,FT,DoF,NF) + Rad = Grid.Rad + F = zeros(4,3,NF) + FGPU = KernelAbstractions.zeros(backend,FT,4,3,NF) + for iF = 1 : NF + F[1,1,iF] = Grid.Faces[iF].P[1].x + F[1,2,iF] = Grid.Faces[iF].P[1].y + F[1,3,iF] = Grid.Faces[iF].P[1].z + F[2,1,iF] = Grid.Faces[iF].P[2].x + F[2,2,iF] = Grid.Faces[iF].P[2].y + F[2,3,iF] = Grid.Faces[iF].P[2].z + F[3,1,iF] = Grid.Faces[iF].P[3].x + F[3,2,iF] = Grid.Faces[iF].P[3].y + F[3,3,iF] = Grid.Faces[iF].P[3].z + F[4,1,iF] = Grid.Faces[iF].P[4].x + F[4,2,iF] = Grid.Faces[iF].P[4].y + F[4,3,iF] = Grid.Faces[iF].P[4].z + end + copyto!(FGPU,F) + JacobiSphere2GPU!(X,dXdxI,J,CG,F,Rad) + M = MassCGGPU2(CG,J,Exchange,Global) + + NFG = min(div(512,N*N),NF) + group = (N, N, NFG) + ndrange = (N, N, NF) + KHyperViscHeightKernel! = HyperViscHeightKernel!(backend,group) + SmoothType = "Hyper" + FHeight = similar(Height) + if SmoothType == "Diff" + SmoothFac=1.0e9 + @inbounds for i=1:10 + @. FHeight = 0 + KHyperViscHeightKernel!(FHeight,Height,CG.DS,CG.DW,dXdxI,J,M,CG.Glob,ndrange=ndrange) + KernelAbstractions.synchronize(backend) + Parallels.ExchangeData!(FHeight,Exchange) + @. Height += 0.5 * SmoothFac * FHeight + @. FHeight = 0 + KHyperViscHeightKernel!(FHeight,Height,CG.DS,CG.DW,dXdxI,J,M,CG.Glob,ndrange=ndrange) + KernelAbstractions.synchronize(backend) + Parallels.ExchangeData!(FHeight,Exchange) + @. Height += SmoothFac * FHeight + @. Height = max(Height,0) + end + elseif SmoothType == "Hyper" + SmoothFac=1.e18 + FHeight1 = similar(Height) + @inbounds for i=1:10 + @. FHeight1 = 0 + KHyperViscHeightKernel!(FHeight1,Height,CG.DS,CG.DW,dXdxI,J,M,CG.Glob,ndrange=ndrange) + KernelAbstractions.synchronize(backend) + Parallels.ExchangeData!(FHeight1,Exchange) + @. FHeight = 0 + KHyperViscHeightKernel!(FHeight,FHeight1,CG.DS,CG.DW,dXdxI,J,M,CG.Glob,ndrange=ndrange) + KernelAbstractions.synchronize(backend) + Parallels.ExchangeData!(FHeight,Exchange) + @. Height -= 0.5 * SmoothFac * FHeight + + @. FHeight1 = 0 + KHyperViscHeightKernel!(FHeight1,Height,CG.DS,CG.DW,dXdxI,J,M,CG.Glob,ndrange=ndrange) + KernelAbstractions.synchronize(backend) + Parallels.ExchangeData!(FHeight1,Exchange) + @. FHeight = 0 + KHyperViscHeightKernel!(FHeight,FHeight1,CG.DS,CG.DW,dXdxI,J,M,CG.Glob,ndrange=ndrange) + KernelAbstractions.synchronize(backend) + Parallels.ExchangeData!(FHeight,Exchange) + @. Height -= SmoothFac * FHeight + @. Height = max(Height,0) + end + end +end + + +@kernel function HyperViscHeightKernel!(FHeight,@Const(Height),@Const(D),@Const(DW),@Const(dXdxI), + @Const(JJ),@Const(M),@Const(Glob)) + + I, J, iF = @index(Local, NTuple) + _,_,IF = @index(Global, NTuple) + + FaceTilesDim = @uniform @groupsize()[3] + N = @uniform @groupsize()[1] + NF = @uniform @ndrange()[3] + + ID = I + (J - 1) * N + @inbounds ind = Glob[ID,IF] + + HeightCol = @localmem eltype(FHeight) (N,N, FaceTilesDim) + HeightCxCol = @localmem eltype(FHeight) (N,N, FaceTilesDim) + HeightCyCol = @localmem eltype(FHeight) (N,N, FaceTilesDim) + if IF <= NF + @inbounds HeightCol[I,J,iF] = Height[ind] + end + @synchronize + + ID = I + (J - 1) * N + @inbounds ind = Glob[ID,IF] + + if IF <= NF + @inbounds Dxc = D[I,1] * HeightCol[1,J,iF] + @inbounds Dyc = D[J,1] * HeightCol[I,1,iF] + for k = 2 : N + @inbounds Dxc += D[I,k] * HeightCol[k,J,iF] + @inbounds Dyc += D[J,k] * HeightCol[I,k,iF] + end + @inbounds GradDx = (dXdxI[1,1,ID,IF] * Dxc + + dXdxI[2,1,ID,IF] * Dyc) / JJ[ID,IF] + @inbounds GradDy = (dXdxI[1,2,ID,IF] * Dxc + + dXdxI[2,2,ID,IF] * Dyc) / JJ[ID,IF] + @inbounds tempx = dXdxI[1,1,ID,IF] * GradDx + dXdxI[1,2,ID,IF] * GradDy + @inbounds tempy = dXdxI[2,1,ID,IF] * GradDx + dXdxI[2,2,ID,IF] * GradDy + @inbounds HeightCxCol[I,J,iF] = tempx + @inbounds HeightCyCol[I,J,iF] = tempy + end + + @synchronize + + ID = I + (J - 1) * N + @inbounds ind = Glob[ID,IF] + if IF <= NF + @inbounds DivHeight = DW[I,1] * HeightCxCol[1,J,iF] + DW[J,1] * HeightCyCol[I,1,iF] + for k = 2 : N + @inbounds DivHeight += DW[I,k] * HeightCxCol[k,J,iF] + DW[J,k] * HeightCyCol[I,k,iF] + end + @inbounds @atomic FHeight[ind] += DivHeight / M[ind] + end +end + +function MassCGGPU2(CG,J,Exchange,Global) + backend = get_backend(J) + FT = eltype(J) + N = CG.OrdPoly + 1 + DoF = CG.DoF + NF = size(CG.Glob,2) + + NumberThreadGPU = Global.ParallelCom.NumberThreadGPU + + NFG = min(div(NumberThreadGPU,N*N),NF) + group = (N, N, NFG) + ndrange = (N, N, NF) + + KMassCG2Kernel! = MassCG2Kernel!(backend,group) + M = KernelAbstractions.zeros(backend,FT,CG.NumG) + KMassCG2Kernel!(M,J,CG.Glob,ndrange=ndrange) + KernelAbstractions.synchronize(backend) + Parallels.ExchangeData!(M,Exchange) + return M +end + +@kernel function MassCG2Kernel!(M,@Const(JJ),@Const(Glob)) + I,J,IF = @index(Global, NTuple) + + N = @uniform @groupsize()[1] + NF = @uniform @ndrange()[3] + + if IF <= NF + ID = I + (J - 1) * N + @inbounds ind = Glob[ID,IF] + @inbounds @atomic M[ind] += JJ[ID,IF] + end +end diff --git a/src/Outputs/vtkSphere.jl b/src/Outputs/vtkSphere.jl index 038f4bf..5fba6c8 100644 --- a/src/Outputs/vtkSphere.jl +++ b/src/Outputs/vtkSphere.jl @@ -51,10 +51,7 @@ function vtkStruct{FT}(backend,OrdPrint::Int,Trans,CG,Metric,Global) where FT<:A lam=zeros(8,1) theta=zeros(8,1) z=zeros(8,1) - if Global.Grid.Form == "Sphere" && Global.Output.Flat - dTol=2*pi/max(Global.Output.nPanel-1,1) - end - + dTol = Global.Output.dTol FTX = eltype(Metric.X) X = zeros(FTX,OrdPoly+1,OrdPoly+1,2,3) @@ -93,7 +90,7 @@ function vtkStruct{FT}(backend,OrdPrint::Int,Trans,CG,Metric,Global) where FT<:A end end for i = 1 : 8 - pts[:,ipts] = [lam[i],theta[i],max(z[i]-Global.Output.RadPrint,0.0)/Global.Output.H*3] + pts[:,ipts] = [lam[i],theta[i],max(z[i]-Global.Grid.Rad,0.0)/Global.Grid.H*3] ipts = ipts + 1 end else @@ -146,21 +143,22 @@ function vtkStruct{FT}(backend,OrdPrint::Int,Trans,CG,Metric,Global) where FT<:A ) end -function vtkInit2D(OrdPrint::Int,Trans,CG,Global) +function vtkInit2D(OrdPrint::Int,Trans,CG,Metric,Global) OrdPoly = CG.OrdPoly NF = Global.Grid.NumFaces Npts = 4 * NF * OrdPrint * OrdPrint pts = Array{Float64,2}(undef,3,Npts) ipts = 1 - X = zeros(4,3) + x = zeros(4,3) lam=zeros(4,1) theta=zeros(4,1) z=zeros(4,1) - if Global.Grid.Form == "Sphere" && Global.Output.Flat - dTol=2*pi/max(Global.Output.nPanel,1)/4 - end - + dTol = Global.Output.dTol + FT = eltype(Metric.X) + backend = get_backend(Metric.X) + X = zeros(FT,OrdPoly+1,OrdPoly+1,2,3) for iF = 1 : NF + @views copyto!(X,reshape(Metric.X[:,:,:,1,iF],OrdPoly+1,OrdPoly+1,2,3)) dd = 2 / OrdPrint eta0 = -1 for jRef = 1 : OrdPrint @@ -168,41 +166,33 @@ function vtkInit2D(OrdPrint::Int,Trans,CG,Global) eta1 = eta0 + dd for iRef = 1 : OrdPrint ksi1 = ksi0 + dd - X[1,:] = Trans(ksi0,eta0, -1.0,Metric.X[:,:,:,:,1,iF],CG,Global) - X[2,:] = Trans(ksi1,eta0, -1.0,Metric.X[:,:,:,:,1,iF],CG,Global) - X[3,:] = Trans(ksi1,eta1, -1.0,Metric.X[:,:,:,:,1,iF],CG,Global) - X[4,:] = Trans(ksi0,eta1, -1.0,Metric.X[:,:,:,:,1,iF],CG,Global) + @views Trans(x[1,:],ksi0,eta0, -1.0,X,CG,Global) + @views Trans(x[2,:],ksi1,eta0, -1.0,X,CG,Global) + @views Trans(x[3,:],ksi1,eta1, -1.0,X,CG,Global) + @views Trans(x[4,:],ksi0,eta1, -1.0,X,CG,Global) if Global.Grid.Form == "Sphere" && Global.Output.Flat for i=1:4 - (lam[i],theta[i],z[i]) = Grids.cart2sphere(X[i,1],X[i,2],X[i,3]) + (lam[i],theta[i],z[i]) = Grids.cart2sphere(x[i,1],x[i,2],x[i,3]) end -#= lammin = minimum(lam) lammax = maximum(lam) - if lammin < 0.0 || lammax > 2*pi -# @show lammin,lammax -# stop - end if abs(lammin - lammax) > 2*pi-dTol -# @show "vor",lam for i = 1 : 4 - if lam[i] < pi - lam[i] = lam[i] + 2*pi + if lam[i] > pi + lam[i] = lam[i] - 2*pi if lam[i] > 3*pi lam[i] = lam[i] - 2*pi end end end -# @show "nac",lam end - =# for i = 1 : 4 pts[:,ipts] = [lam[i],theta[i],max(z[i]-Global.Output.RadPrint,0.0)/Global.Output.H/5.0] ipts = ipts + 1 end else for i=1:4 - pts[:,ipts] = [X[i,1],X[i,2],X[i,3]] + pts[:,ipts] = [x[i,1],x[i,2],x[i,3]] ipts = ipts + 1 end end @@ -239,8 +229,11 @@ function vtkInit2D(OrdPrint::Int,Trans,CG,Global) end eta0 = eta1 end - return vtkStruct( - vtkInter, + dvtkInter = KernelAbstractions.zeros(backend,FT,size(vtkInter)) + copyto!(dvtkInter,vtkInter) + return vtkStruct{FT, + typeof(dvtkInter)}( + dvtkInter, cells, pts, ) diff --git a/src/Parallels/Exchange.jl b/src/Parallels/Exchange.jl index 45c8a80..a8ca466 100644 --- a/src/Parallels/Exchange.jl +++ b/src/Parallels/Exchange.jl @@ -1075,6 +1075,7 @@ end function ExchangeData!(U::AbstractArray{FT,1},Exchange) where FT<:AbstractFloat + backend = get_backend(U) IndSendBuffer = Exchange.IndSendBuffer IndRecvBuffer = Exchange.IndRecvBuffer NeiProc = Exchange.NeiProc @@ -1088,7 +1089,7 @@ function ExchangeData!(U::AbstractArray{FT,1},Exchange) where FT<:AbstractFloat RecvBuffer = Dict() rreq = MPI.Request[MPI.REQUEST_NULL for _ in (NeiProc .- 1)] @inbounds for iP in eachindex(NeiProc) - RecvBuffer[NeiProc[iP]] = zeros(length(IndRecvBuffer[NeiProc[iP]])) + RecvBuffer[NeiProc[iP]] = KernelAbstractions.zeros(backend,FT,length(IndRecvBuffer[NeiProc[iP]])) tag = Proc + ProcNumber*NeiProc[iP] rreq[iP] = MPI.Irecv!(RecvBuffer[NeiProc[iP]], NeiProc[iP] - 1, tag, MPI.COMM_WORLD) end