Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update build system and regression testing on Acorn #809

Closed
wants to merge 13 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmake/configure_wcoss2.cmake
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
set(INLINE_POST ON CACHE BOOL "Enable inline post" FORCE)
set(PARALLEL_NETCDF ON CACHE BOOL "Enable parallel NetCDF" FORCE)
set(DEBUG_LINKMPI OFF CACHE BOOL "Enable linkmpi option when DEBUG mode is on" FORCE)
set(AVX2 OFF CACHE BOOL "Enable AVX2 instruction set" FORCE)
16 changes: 9 additions & 7 deletions modulefiles/ufs_wcoss2
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,18 @@ proc ModulesHelp {} {

module-whatis "loads UFS Model prerequisites on Venus and Mars"

module unload cpe-cray cce
module load cpe-intel intel
module load PrgEnv-intel/8.1.0
module load intel/19.1.3.304
module load craype/2.7.8
module load cray-mpich/8.1.7

module load cmake/3.17.3
module load cmake/3.20.2

module use /lfs/h1/emc/nceplibs/noscrub/hpc-stack/test/noaa/modulefiles/stack
module use /lfs/h1/emc/nceplibs/noscrub/hpc-stack/libs/hpc-stack/modulefiles/stack

module load hpc/1.0.0-beta1
module load hpc-intel/19.1.1.217
module load hpc-cray-mpich/8.0.15
module load hpc/1.2.0
module load hpc-intel/19.1.3.304
module load hpc-cray-mpich/8.1.7

module load ufs_common

Expand Down
16 changes: 9 additions & 7 deletions modulefiles/ufs_wcoss2_debug
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,18 @@ proc ModulesHelp {} {

module-whatis "loads UFS Model prerequisites on Venus and Mars"

module unload cpe-cray cce
module load cpe-intel intel
module load PrgEnv-intel/8.1.0
module load intel/19.1.3.304
module load craype/2.7.8
module load cray-mpich/8.1.7

module load cmake/3.17.3
module load cmake/3.20.2

module use /lfs/h1/emc/nceplibs/noscrub/hpc-stack/test/noaa/modulefiles/stack
module use /lfs/h1/emc/nceplibs/noscrub/hpc-stack/libs/hpc-stack/modulefiles/stack

module load hpc/1.0.0-beta1
module load hpc-intel/19.1.1.217
module load hpc-cray-mpich/8.0.15
module load hpc/1.2.0
module load hpc-intel/19.1.3.304
module load hpc-cray-mpich/8.1.7

module load ufs_common_debug

Expand Down
2,720 changes: 2,720 additions & 0 deletions tests/RegressionTests_wcoss2.log

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions tests/abort_dep_tasks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python
#!/usr/bin/env python3
from __future__ import print_function
import ecflow
import ecflow as ecflow
import re

# this script will work ONLY for standalone nmmb regression test ecflow workflow
Expand Down
3 changes: 0 additions & 3 deletions tests/compile.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,6 @@ set +x
if [[ $MACHINE_ID == macosx.* ]] || [[ $MACHINE_ID == linux.* ]]; then
source $PATHTR/modulefiles/ufs_${MACHINE_ID}
else
if [[ $MACHINE_ID == wcoss2 ]]; then
source /apps/prod/lmodules/startLmod
fi
# Activate lua environment for gaea
if [[ $MACHINE_ID == gaea.* ]] ; then
source /lustre/f2/pdata/esrl/gsd/contrib/lua-5.1.4.9/init/init_lmod.sh
Expand Down
54 changes: 53 additions & 1 deletion tests/default_vars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ if [[ $MACHINE_ID = wcoss_cray ]]; then
TASKS_stretch=48 ; TPN_stretch=24 ; INPES_stretch=2 ; JNPES_stretch=4
TASKS_strnest=96 ; TPN_strnest=24 ; INPES_strnest=2 ; JNPES_strnest=4

elif [[ $MACHINE_ID = wcoss_dell_p3 || $MACHINE_ID = wcoss2 ]]; then
elif [[ $MACHINE_ID = wcoss_dell_p3 ]]; then

TASKS_dflt=150 ; TPN_dflt=28 ; INPES_dflt=3 ; JNPES_dflt=8
TASKS_thrd=78 ; TPN_thrd=14 ; INPES_thrd=3 ; JNPES_thrd=4
Expand Down Expand Up @@ -67,6 +67,58 @@ elif [[ $MACHINE_ID = wcoss_dell_p3 || $MACHINE_ID = wcoss2 ]]; then
MPB_cdeps_025="0 39"; APB_cdeps_025="0 39"
OPB_cdeps_025="40 159"; IPB_cdeps_025="160 207"

elif [[ $MACHINE_ID = wcoss2 ]]; then

TASKS_dflt=150 ; TPN_dflt=128 ; INPES_dflt=3 ; JNPES_dflt=8
TASKS_thrd=78 ; TPN_thrd=64 ; INPES_thrd=3 ; JNPES_thrd=4
TASKS_c384=336 ; TPN_c384=128 ; INPES_c384=8 ; JNPES_c384=6
TASKS_stretch=48 ; TPN_stretch=128 ; INPES_stretch=2 ; JNPES_stretch=4
TASKS_strnest=96 ; TPN_strnest=128 ; INPES_strnest=2 ; JNPES_strnest=4

TASKS_cpl_dflt=192; TPN_cpl_dflt=128; INPES_cpl_dflt=3; JNPES_cpl_dflt=8
THRD_cpl_dflt=1; WPG_cpl_dflt=6; MPB_cpl_dflt="0 143"; APB_cpl_dflt="0 149"
OPB_cpl_dflt="150 179"; IPB_cpl_dflt="180 191"

TASKS_cpl_dflt_wwav=392; TPN_cpl_dflt_wwav=128; INPES_cpl_dflt_wwav=3; JNPES_cpl_dflt_wwav=8
THRD_cpl_dflt_wwav=1; WPG_cpl_dflt_wwav=6; MPB_cpl_dflt_wwav="0 143"; APB_cpl_dflt_wwav="0 149"
OPB_cpl_dflt_wwav="150 179"; IPB_cpl_dflt_wwav="180 191"; WPB_cpl_dflt_wwav="192 391"

TASKS_cpl_thrd=120; TPN_cpl_thrd=64; INPES_cpl_thrd=3; JNPES_cpl_thrd=4
THRD_cpl_thrd=2; WPG_cpl_thrd=6; MPB_cpl_thrd="0 77"; APB_cpl_thrd="0 77"
OPB_cpl_thrd="78 107"; IPB_cpl_thrd="108 119"

TASKS_cpl_bmrk=480; TPN_cpl_bmrk=128; INPES_cpl_bmrk=6; JNPES_cpl_bmrk=8
THRD_cpl_bmrk=1; WPG_cpl_bmrk=24; MPB_cpl_bmrk="0 287"; APB_cpl_bmrk="0 311"
OPB_cpl_bmrk="312 431"; IPB_cpl_bmrk="432 479"

TASKS_cpl_wwav=560; TPN_cpl_wwav=128; INPES_cpl_wwav=6; JNPES_cpl_wwav=8
THRD_cpl_wwav=1; WPG_cpl_wwav=24; MPB_cpl_wwav="0 287"; APB_cpl_wwav="0 311"
OPB_cpl_wwav="312 431"; IPB_cpl_wwav="432 479"; WPB_cpl_wwav="480 559"

TASKS_cpl_c192=288; TPN_cpl_c192=128; INPES_cpl_c192=4; JNPES_cpl_c192=8
THRD_cpl_c192=1; WPG_cpl_c192=12; MPB_cpl_c192="0 191"; APB_cpl_c192="0 203"
OPB_cpl_c192="204 263"; IPB_cpl_c192="264 287"

TASKS_cpl_c384=318; TPN_cpl_c384=128; INPES_cpl_c384=3; JNPES_cpl_c384=8
THRD_cpl_c384=1; WPG_cpl_c384=6; MPB_cpl_c384="0 143"; APB_cpl_c384="0 149"
OPB_cpl_c384="150 269"; IPB_cpl_c384="270 317"

TASKS_datm_100=120; TPN_datm_100=128
MPB_datm_100="16 77"; APB_datm_100="0 15"
OPB_datm_100="78 107"; IPB_datm_100="108 119"

TASKS_datm_025=208; TPN_datm_025=128
MPB_datm_025="0 39"; APB_datm_025="0 39"
OPB_datm_025="40 159"; IPB_datm_025="160 207"

TASKS_cdeps_100=40; TPN_cdeps_100=128
MPB_cdeps_100="0 11"; APB_cdeps_100="0 11"
OPB_cdeps_100="12 27"; IPB_cdeps_100="28 39"

TASKS_cdeps_025=208; TPN_cdeps_025=128
MPB_cdeps_025="0 39"; APB_cdeps_025="0 39"
OPB_cdeps_025="40 159"; IPB_cdeps_025="160 207"

elif [[ $MACHINE_ID = orion.* ]]; then

TASKS_dflt=150 ; TPN_dflt=40 ; INPES_dflt=3 ; JNPES_dflt=8
Expand Down
6 changes: 4 additions & 2 deletions tests/detect_machine.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ case $(hostname -f) in
m72a2.ncep.noaa.gov) MACHINE_ID=wcoss_dell_p3 ;; ### mars
m72a3.ncep.noaa.gov) MACHINE_ID=wcoss_dell_p3 ;; ### mars

alogin01) MACHINE_ID=wcoss2 ;; ### acorn
alogin02) MACHINE_ID=wcoss2 ;; ### acorn
alogin01.acorn.wcoss2.ncep.noaa.gov) MACHINE_ID=wcoss2 ;; ### acorn
alogin02.acorn.wcoss2.ncep.noaa.gov) MACHINE_ID=wcoss2 ;; ### acorn
adecflow01.acorn.wcoss2.ncep.noaa.gov) MACHINE_ID=wcoss2 ;; ### acorn
adecflow02.acorn.wcoss2.ncep.noaa.gov) MACHINE_ID=wcoss2 ;; ### acorn

gaea9) MACHINE_ID=gaea ;; ### gaea9
gaea10) MACHINE_ID=gaea ;; ### gaea10
Expand Down
3 changes: 1 addition & 2 deletions tests/fv3_conf/compile_qsub.IN_wcoss2
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,14 @@
#PBS -o out
#PBS -e err
#PBS -N @[JBNME]
# #PBS -A @[ACCNR]
#PBS -A @[ACCNR]
#PBS -q @[QUEUE]
#PBS -l select=1:ncpus=8:mpiprocs=1
#PBS -l walltime=00:30:00

set -eux

cd $PBS_O_WORKDIR
export CRAY_CONFIG_DIR=$HOME

echo -n " $( date +%s )," > job_timestamp.txt
echo "Compile started: " `date`
Expand Down
9 changes: 4 additions & 5 deletions tests/fv3_conf/fv3_qsub.IN_wcoss2
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#PBS -o out
#PBS -e err
#PBS -N @[JBNME]
# #PBS -A @[ACCNR]
#PBS -A @[ACCNR]
#PBS -q @[QUEUE]
#PBS -l place=vscatter,select=@[NODES]:ncpus=@[TPN]:mpiprocs=@[TPN]
#PBS -l walltime=00:@[WLCLK]:00
Expand All @@ -12,21 +12,20 @@ set -eux
echo -n " $( date +%s )," > job_timestamp.txt

cd $PBS_O_WORKDIR
export CRAY_CONFIG_DIR=$HOME

source /apps/prod/lmodules/startLmod
module use $( pwd -P )
module load modules.fv3
module load cray-pals
module list

echo "Model started: " `date`

export MPI_TYPE_DEPTH=20
export OMP_STACKSIZE=512M
export OMP_NUM_THREADS=@[THRD]
export OMP_PLACES=cores
export ESMF_RUNTIME_COMPLIANCECHECK=OFF:depth=4

cray aprun -n @[TASKS] ./fv3.exe
mpiexec -n @[TASKS] -ppn @[TPN] -depth @[THRD] ./fv3.exe

echo "Model ended: " `date`
echo -n " $( date +%s )," >> job_timestamp.txt
36 changes: 18 additions & 18 deletions tests/rt.conf
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ COMPILE | -DAPP=S2S -DCCPP_SUITES=FV3_GFS_2017_coupled,FV3_GFS_v16_coupled,FV3_G
RUN | cpld_control | - wcoss_cray | fv3 |
RUN | cpld_restart | - wcoss_cray | | cpld_control
RUN | cpld_2threads | - wcoss_cray | |
RUN | cpld_decomp | - wcoss_cray jet.intel | |
RUN | cpld_decomp | - wcoss_cray wcoss2 jet.intel | |
RUN | cpld_ca | - wcoss_cray | fv3 |

#12h/36h/48h restart tests
Expand All @@ -22,10 +22,10 @@ RUN | cpld_bmark_v16
RUN | cpld_restart_bmark_v16 | - wcoss_cray | | cpld_bmark_v16
RUN | cpld_bmark_v16_nsst | - wcoss_cray | fv3 |

COMPILE | -DAPP=S2SW -DCCPP_SUITES=FV3_GFS_2017_coupled,FV3_GFS_v16_coupled,FV3_GFS_v16_coupled_nsstNoahmpUGWPv1 | - wcoss_cray | fv3 |
RUN | cpld_bmark_wave_v16 | - wcoss_cray | fv3 |
RUN | cpld_bmark_wave_v16_p7b | - wcoss_cray cheyenne.intel | fv3 |
RUN | cpld_control_wave | - wcoss_cray | fv3 |
COMPILE | -DAPP=S2SW -DCCPP_SUITES=FV3_GFS_2017_coupled,FV3_GFS_v16_coupled,FV3_GFS_v16_coupled_nsstNoahmpUGWPv1 | - wcoss_cray wcoss2 | fv3 |
RUN | cpld_bmark_wave_v16 | - wcoss_cray wcoss2 | fv3 |
RUN | cpld_bmark_wave_v16_p7b | - wcoss_cray wcoss2 cheyenne.intel | fv3 |
RUN | cpld_control_wave | - wcoss_cray wcoss2 | fv3 |

COMPILE | -DAPP=S2S -DDEBUG=ON -DCCPP_SUITES=FV3_GFS_2017_coupled | - wcoss_cray | fv3 |
RUN | cpld_debug | - wcoss_cray | fv3 |
Expand All @@ -40,9 +40,9 @@ RUN | control
RUN | control_decomp | | |
RUN | control_2threads | - wcoss_cray | |
RUN | control_restart | | | control
RUN | control_fhzero | | |
RUN | control_fhzero | - wcoss2 | |
RUN | control_CubedSphereGrid | | fv3 |
RUN | control_wrtGauss_netcdf_parallel | | fv3 |
RUN | control_wrtGauss_netcdf_parallel | - wcoss2 | fv3 |
RUN | control_c48 | | fv3 |
RUN | control_c192 | | fv3 |
RUN | control_c384 | - wcoss_cray | fv3 |
Expand All @@ -60,7 +60,7 @@ RUN | regional_restart
RUN | regional_quilt | | fv3 |
RUN | regional_quilt_2threads | - wcoss_cray jet.intel | |
RUN | regional_quilt_hafs | | fv3 |
RUN | regional_quilt_netcdf_parallel | | fv3 |
RUN | regional_quilt_netcdf_parallel | - wcoss2 | fv3 |
RUN | regional_quilt_RRTMGP | | fv3 |

RUN | fv3_gsd | | fv3 |
Expand Down Expand Up @@ -144,14 +144,14 @@ RUN | fv3_esg_HAFS_v0_hwrf_thompson_debug
# HAFS tests #
###################################################################################################################################################################################

COMPILE | -DAPP=HAFS -DCCPP_SUITES=FV3_HAFS_v0_gfdlmp_tedmf,FV3_HAFS_v0_gfdlmp_tedmf_nonsst -D32BIT=ON | | fv3 |
RUN | hafs_regional_atm | | fv3 |
RUN | hafs_regional_atm_ocn | | fv3 |
COMPILE | -DAPP=HAFS -DCCPP_SUITES=FV3_HAFS_v0_gfdlmp_tedmf,FV3_HAFS_v0_gfdlmp_tedmf_nonsst -D32BIT=ON | - wcoss2 | fv3 |
RUN | hafs_regional_atm | - wcoss2 | fv3 |
RUN | hafs_regional_atm_ocn | - wcoss2 | fv3 |

COMPILE | -DAPP=HAFS-ALL -DCCPP_SUITES=FV3_HAFS_v0_gfdlmp_tedmf,FV3_HAFS_v0_gfdlmp_tedmf_nonsst -D32BIT=ON | - wcoss_cray | fv3 |
RUN | hafs_regional_docn | - wcoss_cray | fv3 |
RUN | hafs_regional_docn_oisst | - wcoss_cray | fv3 |
RUN | hafs_regional_datm_cdeps | - wcoss_cray | fv3 |
COMPILE | -DAPP=HAFS-ALL -DCCPP_SUITES=FV3_HAFS_v0_gfdlmp_tedmf,FV3_HAFS_v0_gfdlmp_tedmf_nonsst -D32BIT=ON | - wcoss_cray wcoss2 | fv3 |
RUN | hafs_regional_docn | - wcoss_cray wcoss2 | fv3 |
RUN | hafs_regional_docn_oisst | - wcoss_cray wcoss2 | fv3 |
RUN | hafs_regional_datm_cdeps | - wcoss_cray wcoss2 | fv3 |

###################################################################################################################################################################################
# NEMS Data Atmosphere tests #
Expand Down Expand Up @@ -196,9 +196,9 @@ RUN | datm_cdeps_debug_cfsr
# ATM-WAV tests #
###################################################################################################################################################################################

COMPILE | -DAPP=ATMW -DCCPP_SUITES=FV3_GFS_v16 -D32BIT=ON | - wcoss_cray | fv3 |
RUN | control_atmwav | - wcoss_cray | fv3 |
RUN | control_c384gdas_wav | - wcoss_cray jet.intel cheyenne.intel | fv3 |
COMPILE | -DAPP=ATMW -DCCPP_SUITES=FV3_GFS_v16 -D32BIT=ON | - wcoss_cray wcoss2 | fv3 |
RUN | control_atmwav | - wcoss_cray wcoss2 | fv3 |
RUN | control_c384gdas_wav | - wcoss_cray jet.intel cheyenne.intel wcoss2 | fv3 |

###################################################################################################################################################################################
# ATM-GOCART tests #
Expand Down
Loading