Skip to content

Commit

Permalink
fix memory allocation bug: WABBIT now is more certain to allocate exa…
Browse files Browse the repository at this point in the history
…ctly what --memory tells it to
  • Loading branch information
tommy-engels committed Jun 5, 2021
1 parent 10aacfa commit 86c5e48
Show file tree
Hide file tree
Showing 8 changed files with 160 additions and 192 deletions.
280 changes: 104 additions & 176 deletions LIB/MESH/allocate_grid.f90

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion LIB/MODULE/module_globals.f90
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ module module_globals
! this parameter is a hack. in most parts of the code, a block has n_eqn component entries.
! universality dictates that we can also use a different number of components, for example
! when syn'ing the mask function (which in many cases has six entries.)
integer, public :: N_MAX_COMPONENTS = 6
! New in 06/2021: the hack continues. We now set this parameter at different places
! to save on memory. That can be params%n_eqn (default in simulations), 6 (if mask is synced). The new default is 3,
! for postprocessing.
integer, public :: N_MAX_COMPONENTS = 3

!subroutines of this module
interface abort
Expand Down
20 changes: 7 additions & 13 deletions LIB/MPI/module_mpi.f90
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,9 @@ subroutine init_ghost_nodes( params )
allocate( tmp_block( Bs(1)+2*g, Bs(2)+2*g, 1, Neqn) )
end if

! size of ghost nodes buffer. Note this contains only the ghost nodes layer
! for all my blocks. previous versions allocated one of those per "friend"
! size of ghost nodes buffer. The worst case is that we have to send ALL my ghost node
! points to another rank, but this happens only if ALL my blocks have ONLY neighbors
! on other MPIRANKS.
if ( params%dim==3 ) then
buffer_N = number_blocks * Neqn * ( (Bs(1)+2*g)*(Bs(2)+2*g)*(Bs(3)+2*g) - (Bs(1)*Bs(2)*Bs(3)) )
else
Expand All @@ -213,20 +214,14 @@ subroutine init_ghost_nodes( params )
! allocate synch buffer
if (rank==0) then
write(*,'("GHOSTS-INIT: Attempting to allocate the ghost-sync-buffer.")')

write(*,'("GHOSTS-INIT: buffer_N_int=",i12," buffer_N=",i12," Nstages=",i1)') &
buffer_N_int, buffer_N, Nstages

write(*,'("GHOSTS-INIT: On each MPIRANK, Int buffer:", f9.4, "GB")') &
2.0*dble(buffer_N_int)*dble(Nstages)*8e-9

write(*,'("GHOSTS-INIT: On each MPIRANK, Real buffer:", f9.4, "GB")') &
2.0*dble(buffer_N)*dble(Nstages)*8e-9
write(*,'("GHOSTS-INIT: buffer_N_int=",i12," buffer_N=",i12," Nstages=",i1)') buffer_N_int, buffer_N, Nstages
write(*,'("GHOSTS-INIT: Int buffer:", f9.4, " GB per rank")') 2.0*dble(buffer_N_int)*dble(Nstages)*8e-9
write(*,'("GHOSTS-INIT: Real buffer:", f9.4, " GB per rank")') 2.0*dble(buffer_N)*dble(Nstages)*8e-9
write(*,'("---------------- allocating now ----------------")')
endif

! wait now so that if allocation fails, we get at least the above info
call MPI_barrier( WABBIT_COMM, status(1))
call MPI_barrier( WABBIT_COMM, status(1) )

allocate( int_send_buffer( 1:buffer_N_int, 1:Nstages), stat=status(1) )
allocate( int_recv_buffer( 1:buffer_N_int, 1:Nstages), stat=status(2) )
Expand All @@ -236,7 +231,6 @@ subroutine init_ghost_nodes( params )
if (maxval(status) /= 0) call abort(999999, "Buffer allocation failed. Not enough memory?")

if (rank==0) then

write(*,'("GHOSTS-INIT: on each mpirank, Allocated ",A25," SHAPE=",7(i9,1x))') &
"new_send_buffer", shape(new_send_buffer)

Expand Down
23 changes: 21 additions & 2 deletions LIB/MPI/synchronize_ghosts_generic.f90
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ subroutine synchronize_ghosts_generic_sequence( params, lgt_block, hvy_block, hv

integer(kind=ik) :: ijk(2,3)
integer(kind=ik) :: bounds_type, istage, istage_buffer(1:4), rounds(1:4), inverse
logical, save :: informed = .false.


if (.not. ghost_nodes_module_ready) then
Expand All @@ -42,8 +43,23 @@ subroutine synchronize_ghosts_generic_sequence( params, lgt_block, hvy_block, hv
! if this mpirank has no active blocks, it has nothing to do here.
if (hvy_n == 0) return

if (size(hvy_block,4)>N_max_components) then
call abort(160720191,"You try to ghost-sync a vector with too many components.")
if (size(hvy_block,4)>N_MAX_COMPONENTS .and. .not. informed) then
if (params%rank ==0) then
write(*,*) "-------------------------------------------------------------------------"
write(*,*) "---warning---warning---warning---warning---warning---warning---warning---"
write(*,*) "---warning---warning---warning---warning---warning---warning---warning---"
write(*,*) "-------------------------------------------------------------------------"
write(*,*) " A warning from the ghost nodes module: we have allocated a buffer with an estimation for"
write(*,*) " neqn=", N_MAX_COMPONENTS, " components of a vector, but you try to sync"
write(*,*) " neqn=", size(hvy_block,4), " This may work just fine: but in some (rare) cases, "
write(*,*) " we will see a buffer overflow. The code will then abort with an error, and you have"
write(*,*) " to restart this simulation with more memory."
write(*,*) "-------------------------------------------------------------------------"
write(*,*) "---warning---warning---warning---warning---warning---warning---warning---"
write(*,*) "---warning---warning---warning---warning---warning---warning---warning---"
write(*,*) "-------------------------------------------------------------------------"
endif
informed = .true.
endif

Bs = params%Bs
Expand Down Expand Up @@ -1053,6 +1069,9 @@ subroutine AppendLineToBuffer( int_send_buffer, new_send_buffer, buffer_size, ne

! real data
if (buffer_size>0) then
if (i0+buffer_size-1 >= size(new_send_buffer,1)) then
call abort(202106049, "Internal bug: we ran out of space for the ghost nodes. Restart simulation with more memory.")
endif
new_send_buffer( i0:i0+buffer_size-1, istage ) = line_buffer(1:buffer_size)
endif

Expand Down
13 changes: 13 additions & 0 deletions LIB/PARAMS/ini_file_to_params.f90
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,15 @@ subroutine ini_file_to_params( params, filename )
call read_param_mpi(FILE, 'VPM', 'mask_time_independent_part', params%mask_time_independent_part, .true.)
call read_param_mpi(FILE, 'VPM', 'dont_use_pruned_tree_mask', params%dont_use_pruned_tree_mask, .false.)

if (params%physics_type == "ACM-new") then
if (params%penalization) then
if ((.not.params%dont_use_pruned_tree_mask).and.(params%mask_time_independent_part)) then
! we sync the mask array in this case, which has 6 components
N_MAX_COMPONENTS = max(6, params%n_eqn)
endif
endif
endif

! decide if we use hartens point value multiresolution transform, which uses a coarsening operator
! that just takes every 2nd grid point or biorthogonal wavlets, which apply a smoothing filter (lowpass)
! prior to downsampling.
Expand Down Expand Up @@ -270,6 +279,10 @@ subroutine ini_blocks(params, FILE )
call abort(170619,"Error: Max treelevel cannot be larger 18 (64bit long integer problem) ")
end if

! the default case is that we synchronize (ghosts) with n-eqn compontents in the vector
! may be overwritten if pruned tree mask is used (by six)
N_MAX_COMPONENTS = params%n_eqn

! read switch to turn on|off mesh refinement
call read_param_mpi(FILE, 'Blocks', 'adapt_mesh', params%adapt_mesh, .true. )
call read_param_mpi(FILE, 'Blocks', 'adapt_inicond', params%adapt_inicond, params%adapt_mesh )
Expand Down
6 changes: 6 additions & 0 deletions LIB/POSTPROCESSING/module_MOR.f90
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,8 @@ subroutine post_POD(params)
call get_cmd_arg( "--start_from_eigenbasis", eigenbasis_files)
call get_cmd_arg( "--components", n_components, default=1_ik)

N_MAX_COMPONENTS = n_components ! used for ghost node sync'ing (buffer allocation)

!-------------------------------
! Set some wabbit specific params
!-------------------------------
Expand Down Expand Up @@ -751,6 +753,7 @@ subroutine post_PODerror(params)
call get_cmd_arg( "--components", n_components, default=1_ik)
call get_cmd_arg( "--iteration", iteration, default=1_ik)

N_MAX_COMPONENTS = n_components ! used for ghost node sync'ing (buffer allocation)

if ( iteration>0 ) then
if ( params%rank == 0 ) write(*,*) "Iteration reconstructed: " ,iteration
Expand Down Expand Up @@ -1320,6 +1323,7 @@ subroutine post_reconstruct(params)
call get_cmd_arg( "--iteration", iteration, default=-1_ik)
call get_cmd_arg( "--nmodes", N_modes_used, default=1_ik)

N_MAX_COMPONENTS = n_components ! used for ghost node sync'ing (buffer allocation)

if ( iteration>0 ) then
save_all = .False.
Expand Down Expand Up @@ -1729,6 +1733,8 @@ subroutine post_timecoef_POD(params)
call get_cmd_arg( "--components", n_components, default=1_ik)
call get_cmd_arg( "--iteration", iteration, default=1)

N_MAX_COMPONENTS = n_components ! used for ghost node sync'ing (buffer allocation)

if ( iteration>0 ) then
if ( params%rank == 0 ) write(*,*) "Iteration reconstructed: " ,iteration
endif
Expand Down
2 changes: 2 additions & 0 deletions LIB/POSTPROCESSING/post_generate_forest.f90
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ subroutine post_generate_forest(params)
params%block_distribution = "sfc_hilbert"
params%time_step_method = 'none'

N_MAX_COMPONENTS = params%n_eqn ! used for ghost node sync'ing (buffer allocation)


! we have to allocate grid if this routine is called for the first time
call allocate_forest(params, lgt_block, hvy_block, hvy_neighbor, lgt_active, &
Expand Down
3 changes: 3 additions & 0 deletions TESTING/runtests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,10 @@ do
echo "Writing output to: " ${logfile}

# run the actual test
T2="$(date +%s)"
./${ts} > $logfile
T2="$(($(date +%s)-T2))"
echo "Time used in test: ${T2} seconds"

if [ $? == 0 ]; then
printf "%s \n" "${pass_color} pass ${end_color}"
Expand Down

0 comments on commit 86c5e48

Please sign in to comment.