Skip to content

Commit

Permalink
Optimized advec_kappa scheme
Browse files Browse the repository at this point in the history
- rlim function inlined and rewritten without division and eps1
- both branches of if uvw0 > 0 calculated, then selected
  in order to enable vectorization.
by Jisk Attema and Fredrik Jansson
  • Loading branch information
fjansson committed Oct 6, 2020
1 parent d8e6c68 commit 20b27f8
Showing 1 changed file with 152 additions and 2 deletions.
154 changes: 152 additions & 2 deletions src/advec_kappa.f90
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,18 @@
!! \endlatexonly
!! This makes the scheme monotone, but also rather dissipative.
!!
!! limiter phi(r) = max(0, min(2*r, 2, K(r))) (20) in Hundsdorfer 1995
!! K(r) = 1./3.+2./3.*r here -> kappa = 1/3 -> third-order upwind-biased scheme
!!
!! Changes 2020 by Jisk Attema and Fredrik Jansson:
!! - support for non-uniform vertical grid by replacing dzi by 1/dzf(k).
!! neither the gradient nor the limiter has been modified.
!! - vectorization
!! - rlim function inlined and rewritten without division and eps1
!! - both branches of if uvw0 > 0 calculated, then selected
!! in order to enable vectorization
!! - merge k-loops of the x,y,z advection steps for better cache efficiency
!!
! This file is part of DALES.
!
! DALES is free software; you can redistribute it and/or modify
Expand All @@ -34,7 +46,145 @@
! Copyright 1993-2009 Delft University of Technology, Wageningen University, Utrecht University, KNMI
!

subroutine advecc_kappa(putin,putout)
subroutine advecc_kappa(putin,putout)
use modglobal, only : i1,i2,ih,j1,j2,jh,k1,kmax,dxi,dyi,dzf
use modfields, only : u0, v0, w0, rhobf
implicit none
real, dimension(2-ih:i1+ih,2-jh:j1+jh,k1), intent(in) :: putin
real, dimension(2-ih:i1+ih,2-jh:j1+jh,k1), intent(inout) :: putout

real d1,d2,cf
real :: d1m, d2m, d1p, cfm, cfp, work
integer i,j,k

! from layer 1 to 2, special case. k=2
do j=2,j1
do i=2,i1 ! YES
d1m = 0
d2m = rhobf(2) * putin(i,j,2) - rhobf(1) * putin(i,j,1)
cfm = rhobf(1) * putin(i,j,1)
d1p = rhobf(2) * putin(i,j,2) - rhobf(3) * putin(i,j,3)
!d2p = rhobf(1) * putin(i,j,1) - rhobf(2) * putin(i,j,2 ) ! d2p = -d2m
cfp = rhobf(2) * putin(i,j,2)

if (w0(i,j,2) > 0) then
d1 = d1m
d2 = d2m
cf = cfm
else
d1 = d1p
d2 = -d2m
cf = cfp
end if

work = cf + &
min(abs(d1), abs(d2), abs((d1/6.0) + (d2/3.0))) * &
(sign(0.5, d1) + sign(0.5, d2))

work = work * w0(i,j,2)
putout(i,j,1) = putout(i,j,1) - (1./(rhobf(1)*dzf(1)))*work
putout(i,j,2) = putout(i,j,2) + (1./(rhobf(2)*dzf(2)))*work
end do
end do

do k=1,kmax
do j=2,j1
do i=2,i2 ! YES
d2m = putin(i ,j,k) -putin(i-1,j,k)
! d2p = -putin(i ,j,k) +putin(i-1,j,k) ! d2p = -d2m
d1m = putin(i-1,j,k)-putin(i-2,j,k)
d1p = putin(i ,j,k)-putin(i+1,j,k)
cfm = putin(i-1,j,k)
cfp = putin(i ,j,k)

if (u0(i,j,k) > 0) then
d1 = d1m
d2 = d2m
cf = cfm
else
d1 = d1p
d2 = -d2m
cf = cfp
end if

work = cf + &
min(abs(d1), abs(d2), abs((d1/6.0) + (d2/3.0))) * &
(sign(0.5, d1) + sign(0.5, d2))

work = work * u0(i,j,k) * dxi
putout(i-1,j,k) = putout(i-1,j,k) - work
putout(i,j,k) = putout(i,j,k) + work
end do
end do
! end do

! do k=1,kmax
do j=2,j2
do i=2,i1 ! YES
d1m = putin(i,j-1,k)-putin(i,j-2,k)
d1p = putin(i,j ,k)-putin(i,j+1,k)
d2m = putin(i,j ,k)-putin(i,j-1,k)
!d2p = putin(i,j-1,k)-putin(i,j ,k) ! d2p = -d2m
cfm = putin(i,j-1,k)
cfp = putin(i,j ,k)

if (v0(i,j,k) > 0) then
d1 = d1m
d2 = d2m
cf = cfm
else
d1 = d1p
d2 = -d2m
cf = cfp
end if

work = cf + &
min(abs(d1), abs(d2), abs((d1/6.0) + (d2/3.0))) * &
(sign(0.5, d1) + sign(0.5, d2))

work = work * v0(i,j,k) * dyi
putout(i,j-1,k) = putout(i,j-1,k) - work
putout(i,j,k) = putout(i,j,k) + work
end do
end do
! end do

! do k=3,kmax
if (k >= 3) then
do j=2,j1
do i=2,i1 ! YES
d1m = rhobf(k-1) * putin(i,j,k-1) - rhobf(k-2) * putin(i,j,k-2)
d2m = rhobf(k) * putin(i,j,k ) - rhobf(k-1) * putin(i,j,k-1)
d1p = rhobf(k) * putin(i,j,k ) - rhobf(k+1) * putin(i,j,k+1)
! d2p = rhobf(k-1) * putin(i,j,k-1) - rhobf(k) * putin(i,j,k ) ! d2p = -d2m
cfm = rhobf(k-1) * putin(i,j,k-1)
cfp = rhobf(k) * putin(i,j,k )

if (w0(i,j,k) > 0) then
d1 = d1m
d2 = d2m
cf = cfm
else
d1 = d1p
d2 = -d2m
cf = cfp
end if

work = cf + &
min(abs(d1), abs(d2), abs((d1/6.0) + (d2/3.0))) * &
(sign(0.5, d1) + sign(0.5, d2))

work = work * w0(i,j,k)
putout(i,j,k-1) = putout(i,j,k-1) - (1./(rhobf(k-1)*dzf(k-1)))*work
putout(i,j,k) = putout(i,j,k) + (1./(rhobf(k) *dzf(k) ))*work
end do
end do
end if
end do
end subroutine advecc_kappa


subroutine advecc_kappa_old(putin,putout)

use modglobal, only : i1,i2,ih,j1,j2,jh,k1,kmax,dxi,dyi,dzf
use modfields, only : u0, v0, w0, rhobf
Expand Down Expand Up @@ -121,7 +271,7 @@ subroutine advecc_kappa(putin,putout)
end do
end do

end subroutine advecc_kappa
end subroutine advecc_kappa_old

subroutine halflev_kappa(putin,putout)

Expand Down

0 comments on commit 20b27f8

Please sign in to comment.