From 20b27f822ad2c1bd0c0499b1d44988b2dbde3134 Mon Sep 17 00:00:00 2001 From: Fredrik Jansson Date: Tue, 6 Oct 2020 09:45:00 +0200 Subject: [PATCH] Optimized advec_kappa scheme - rlim function inlined and rewritten without division and eps1 - both branches of if uvw0 > 0 calculated, then selected in order to enable vectorization. by Jisk Attema and Fredrik Jansson --- src/advec_kappa.f90 | 154 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 152 insertions(+), 2 deletions(-) diff --git a/src/advec_kappa.f90 b/src/advec_kappa.f90 index 8d44c29c..6257b528 100644 --- a/src/advec_kappa.f90 +++ b/src/advec_kappa.f90 @@ -16,6 +16,18 @@ !! \endlatexonly !! This makes the scheme monotone, but also rather dissipative. !! +!! limiter phi(r) = max(0, min(2*r, 2, K(r))) (20) in Hundsdorfer 1995 +!! K(r) = 1./3.+2./3.*r here -> kappa = 1/3 -> third-order upwind-biased scheme +!! +!! Changes 2020 by Jisk Attema and Fredrik Jansson: +!! - support for non-uniform vertical grid by replacing dzi by 1/dzf(k). +!! neither the gradient nor the limiter has been modified. +!! - vectorization +!! - rlim function inlined and rewritten without division and eps1 +!! - both branches of if uvw0 > 0 calculated, then selected +!! in order to enable vectorization +!! - merge k-loops of the x,y,z advection steps for better cache efficiency +!! ! This file is part of DALES. ! ! DALES is free software; you can redistribute it and/or modify @@ -34,7 +46,145 @@ ! Copyright 1993-2009 Delft University of Technology, Wageningen University, Utrecht University, KNMI ! - subroutine advecc_kappa(putin,putout) +subroutine advecc_kappa(putin,putout) + use modglobal, only : i1,i2,ih,j1,j2,jh,k1,kmax,dxi,dyi,dzf + use modfields, only : u0, v0, w0, rhobf + implicit none + real, dimension(2-ih:i1+ih,2-jh:j1+jh,k1), intent(in) :: putin + real, dimension(2-ih:i1+ih,2-jh:j1+jh,k1), intent(inout) :: putout + + real d1,d2,cf + real :: d1m, d2m, d1p, cfm, cfp, work + integer i,j,k + + ! from layer 1 to 2, special case. k=2 + do j=2,j1 + do i=2,i1 ! YES + d1m = 0 + d2m = rhobf(2) * putin(i,j,2) - rhobf(1) * putin(i,j,1) + cfm = rhobf(1) * putin(i,j,1) + d1p = rhobf(2) * putin(i,j,2) - rhobf(3) * putin(i,j,3) + !d2p = rhobf(1) * putin(i,j,1) - rhobf(2) * putin(i,j,2 ) ! d2p = -d2m + cfp = rhobf(2) * putin(i,j,2) + + if (w0(i,j,2) > 0) then + d1 = d1m + d2 = d2m + cf = cfm + else + d1 = d1p + d2 = -d2m + cf = cfp + end if + + work = cf + & + min(abs(d1), abs(d2), abs((d1/6.0) + (d2/3.0))) * & + (sign(0.5, d1) + sign(0.5, d2)) + + work = work * w0(i,j,2) + putout(i,j,1) = putout(i,j,1) - (1./(rhobf(1)*dzf(1)))*work + putout(i,j,2) = putout(i,j,2) + (1./(rhobf(2)*dzf(2)))*work + end do + end do + + do k=1,kmax + do j=2,j1 + do i=2,i2 ! YES + d2m = putin(i ,j,k) -putin(i-1,j,k) + ! d2p = -putin(i ,j,k) +putin(i-1,j,k) ! d2p = -d2m + d1m = putin(i-1,j,k)-putin(i-2,j,k) + d1p = putin(i ,j,k)-putin(i+1,j,k) + cfm = putin(i-1,j,k) + cfp = putin(i ,j,k) + + if (u0(i,j,k) > 0) then + d1 = d1m + d2 = d2m + cf = cfm + else + d1 = d1p + d2 = -d2m + cf = cfp + end if + + work = cf + & + min(abs(d1), abs(d2), abs((d1/6.0) + (d2/3.0))) * & + (sign(0.5, d1) + sign(0.5, d2)) + + work = work * u0(i,j,k) * dxi + putout(i-1,j,k) = putout(i-1,j,k) - work + putout(i,j,k) = putout(i,j,k) + work + end do + end do + ! end do + + ! do k=1,kmax + do j=2,j2 + do i=2,i1 ! YES + d1m = putin(i,j-1,k)-putin(i,j-2,k) + d1p = putin(i,j ,k)-putin(i,j+1,k) + d2m = putin(i,j ,k)-putin(i,j-1,k) + !d2p = putin(i,j-1,k)-putin(i,j ,k) ! d2p = -d2m + cfm = putin(i,j-1,k) + cfp = putin(i,j ,k) + + if (v0(i,j,k) > 0) then + d1 = d1m + d2 = d2m + cf = cfm + else + d1 = d1p + d2 = -d2m + cf = cfp + end if + + work = cf + & + min(abs(d1), abs(d2), abs((d1/6.0) + (d2/3.0))) * & + (sign(0.5, d1) + sign(0.5, d2)) + + work = work * v0(i,j,k) * dyi + putout(i,j-1,k) = putout(i,j-1,k) - work + putout(i,j,k) = putout(i,j,k) + work + end do + end do + ! end do + + ! do k=3,kmax + if (k >= 3) then + do j=2,j1 + do i=2,i1 ! YES + d1m = rhobf(k-1) * putin(i,j,k-1) - rhobf(k-2) * putin(i,j,k-2) + d2m = rhobf(k) * putin(i,j,k ) - rhobf(k-1) * putin(i,j,k-1) + d1p = rhobf(k) * putin(i,j,k ) - rhobf(k+1) * putin(i,j,k+1) + ! d2p = rhobf(k-1) * putin(i,j,k-1) - rhobf(k) * putin(i,j,k ) ! d2p = -d2m + cfm = rhobf(k-1) * putin(i,j,k-1) + cfp = rhobf(k) * putin(i,j,k ) + + if (w0(i,j,k) > 0) then + d1 = d1m + d2 = d2m + cf = cfm + else + d1 = d1p + d2 = -d2m + cf = cfp + end if + + work = cf + & + min(abs(d1), abs(d2), abs((d1/6.0) + (d2/3.0))) * & + (sign(0.5, d1) + sign(0.5, d2)) + + work = work * w0(i,j,k) + putout(i,j,k-1) = putout(i,j,k-1) - (1./(rhobf(k-1)*dzf(k-1)))*work + putout(i,j,k) = putout(i,j,k) + (1./(rhobf(k) *dzf(k) ))*work + end do + end do + end if + end do +end subroutine advecc_kappa + + +subroutine advecc_kappa_old(putin,putout) use modglobal, only : i1,i2,ih,j1,j2,jh,k1,kmax,dxi,dyi,dzf use modfields, only : u0, v0, w0, rhobf @@ -121,7 +271,7 @@ subroutine advecc_kappa(putin,putout) end do end do - end subroutine advecc_kappa + end subroutine advecc_kappa_old subroutine halflev_kappa(putin,putout)