Skip to content

Commit

Permalink
Adjusting gitlab yml for internal trigger of ci (#200)
Browse files Browse the repository at this point in the history
* Creating gitlab ci for internal trigger darwin.
  • Loading branch information
Joshua S Brown authored Nov 3, 2020
1 parent af0c2a3 commit 577bde3
Show file tree
Hide file tree
Showing 10 changed files with 375 additions and 2 deletions.
95 changes: 95 additions & 0 deletions .gitlab-ci-darwin.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
variables:
SCHEDULER_PARAMETERS: '--nodes=1 --partition=power9 --export=NONE'
GIT_SUBMODULE_STRATEGY: recursive
MODULE_CMAKE: "cmake/3.12.4"
MODULE_CLANG: "clang/8.0.1"
MODULE_COMPILER: "gcc/7.4.0"
MODULE_CUDA: "cuda/10.1"
MODULE_MPI: "openmpi/p9/4.0.2-gcc_7.4.0"
NUM_GPU_DEVICES_PER_NODE: 2
NUM_MPI_PROC_TESTING: 2
OMP_NUM_THREADS: 1
CMAKE_BUILD_TYPE: "Release"
Kokkos_ARCH_POWER9: "ON"
Kokkos_ARCH_VOLTA70: "ON"
Kokkos_ENABLE_CUDA: "ON"
Kokkos_ENABLE_CUDA_UVM: "OFF"
Kokkos_ENABLE_OPENMP: "ON"
PARTHENON_DISABLE_HDF5: "OFF"

stages:
- performance-regression

# Is performed before the scripts in the stages step
before_script:
- >
env -i bash --norc --noprofile ./scripts/darwin/setup.sh
${MODULE_COMPILER}
${MODULE_MPI}
- >
env -i bash --norc --noprofile ./scripts/darwin/build_hdf5_parallel.sh
${MODULE_COMPILER}
${MODULE_MPI}
.gcc-mpi-cuda-performance-regression:
variables:
BUILD_DIR: "build_power9_perf_regression_gcc_mpi"
CMAKE_CXX_COMPILER: $CI_PROJECT_DIR/external/Kokkos/bin/nvcc_wrapper
script:
- echo "./scripts/darwin/build.sh" > build_cmd.txt
- echo "BUILD_DIR ${BUILD_DIR} " >> build_cmd.txt
- echo "CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} " >> build_cmd.txt
- echo "CMAKE_CXX_COMPILER ${CMAKE_CXX_COMPILER} " >> build_cmd.txt
- echo "Kokkos_ARCH_POWER9 ${Kokkos_ARCH_POWER9} " >> build_cmd.txt
- echo "Kokkos_ARCH_VOLTA70 ${Kokkos_ARCH_VOLTA70} " >> build_cmd.txt
- echo "Kokkos_ENABLE_CUDA ${Kokkos_ENABLE_CUDA} " >> build_cmd.txt
- echo "Kokkos_ENABLE_CUDA_UVM ${Kokkos_ENABLE_CUDA_UVM} " >> build_cmd.txt
- echo "Kokkos_ENABLE_OPENMP ${Kokkos_ENABLE_OPENMP} " >> build_cmd.txt
- echo "NUM_GPU_DEVICES_PER_NODE ${NUM_GPU_DEVICES_PER_NODE}" >> build_cmd.txt
- echo "NUM_MPI_PROC_TESTING ${NUM_MPI_PROC_TESTING} " >> build_cmd.txt
- echo "OMP_NUM_THREADS ${OMP_NUM_THREADS} " >> build_cmd.txt
- echo "PARTHENON_DISABLE_HDF5 ${PARTHENON_DISABLE_HDF5} " >> build_cmd.txt
- echo "MODULE_CMAKE ${MODULE_CMAKE} " >> build_cmd.txt
- echo "MODULE_CLANG ${MODULE_CLANG} " >> build_cmd.txt
- echo "MODULE_COMPILER ${MODULE_COMPILER} " >> build_cmd.txt
- echo "MODULE_CUDA ${MODULE_CUDA} " >> build_cmd.txt
- echo "MODULE_MPI ${MODULE_MPI} " >> build_cmd.txt
- >
env -i bash --norc --noprofile ./scripts/darwin/build.sh
${BUILD_DIR}
${CMAKE_BUILD_TYPE}
${CMAKE_CXX_COMPILER}
${Kokkos_ARCH_POWER9}
${Kokkos_ARCH_VOLTA70}
${Kokkos_ENABLE_CUDA}
${Kokkos_ENABLE_CUDA_UVM}
${Kokkos_ENABLE_OPENMP}
${NUM_GPU_DEVICES_PER_NODE}
${NUM_MPI_PROC_TESTING}
${OMP_NUM_THREADS}
${PARTHENON_DISABLE_HDF5}
${MODULE_CMAKE}
${MODULE_CLANG}
${MODULE_COMPILER}
${MODULE_CUDA}
${MODULE_MPI}
artifacts:
expire_in: 3 days
paths:
- ${CI_PROJECT_DIR}/${BUILD_DIR}/tst/regression/outputs/advection_performance/performance.png
- ${CI_PROJECT_DIR}/${BUILD_DIR}/tst/regression/outputs/advection_performance_mpi/performance.png

parthenon-power9-gcc-mpi-cuda-perf-manual:
extends: .gcc-mpi-cuda-performance-regression
stage: performance-regression
when: manual
except:
- schedules

parthenon-power9-gcc-mpi-cuda-perf-schedule:
extends: .gcc-mpi-cuda-performance-regression
stage: performance-regression
only:
- schedules
- master

1 change: 0 additions & 1 deletion .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ cache:

variables:
GIT_SUBMODULE_STRATEGY: recursive

stages:
- short
- performance_and_regression
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

### Infrastructure (changes irrelevant to downstream codes)
- [[PR 335]](https://github.com/lanl/parthenon/pull/335) New machine configuration file for LANL's Darwin cluster
- [[PR 200]](https://github.com/lanl/parthenon/pull/200) Adds support for running ci on power9 nodes.

### Removed (removing behavior/API/varaibles/...)

Expand Down
15 changes: 14 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ updated API) are discovered early.

In order to keep the main repository in order, everyone is encouraged to create feature
branches starting with their username, followed by a "/", and ending with a brief
description, e.g., "username/add_feature_xyz".
description, e.g., "username/add\_feature\_xyz".
Working on branches in private forks is also fine but not recommended (as the automated
testing infrastructure will then first work upon opening a pull request).

Expand Down Expand Up @@ -147,6 +147,19 @@ follow the instructions [below](#integrating-the-regression-test-with-cmake) *an
`perf-reg` label to the test (see bottom of the regression
[CMakeLists.txt](tst/regression/CMakeLists.txt)).

A third pipeline is run using LANL internal systems and is run manually when
approved, it is also scheduled to run on a dailly basis on the development
branch. The internal machines use the newest IBM powerPC processors and the
NVIDIA V100 (Volta) GPUs (power9 architecture). Tests run on these systems are
primarily aimed at measuring the performance of this specific architecture.
Compilation and testing details can be found by looking in the
[.gitlab-ci-darwin.yml](.gitlab-ci-darwin.yml) file *and* the /scripts/darwin
folder. In summary, the ci is built in release mode, with OpenMP, MPI, HDF5 and
Cuda enabled. All tests are run on a single node with access to two Volta
GPUs. In addition the regression tests are run in parallel with two mpi
processors each of which have access to their own Volta gpu. The following
tests are run with this ci: unit, regression, performance.

### Adding Tests

Five categories of tests have been identified in parthenon, and they are
Expand Down
124 changes: 124 additions & 0 deletions scripts/darwin/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#!/bin/bash


# Load system env only
source /etc/bashrc
source /etc/profile

# Make sure home is pointing to current directory
export PARTHENON=$(pwd)
cd ../
export HOME=$(pwd)
cd $PARTHENON
# Calculate number of available cores
export J=$(( $(nproc --all) )) && echo Using ${J} cores during build

COMPILER_MODULE=${15}
MPI_MODULE=${17}

export TMPDIR=${HOME}/tmp

# Temp directory apparently needed by spack because of OSError: [Errno 18] Invalid cross-device link
if [ -d ${TMPDIR} ]
then
echo "Removing ${TMPDIR}"
rm -rf ${TMPDIR}
fi
echo "Creating tmp directory ${TMPDIR}"
mkdir ${TMPDIR}

compiler_version=$(bash $PARTHENON/scripts/darwin/get_version.sh $COMPILER_MODULE)
compiler_package=$(bash $PARTHENON/scripts/darwin/get_package.sh $COMPILER_MODULE)
mpi_version=$(bash $PARTHENON/scripts/darwin/get_version.sh $MPI_MODULE)
mpi_package=$(bash $PARTHENON/scripts/darwin/get_package.sh $MPI_MODULE)

wrapper_compiler=$(bash $PARTHENON/scripts/darwin/get_cpp.sh $compiler_package)
export NVCC_WRAPPER_DEFAULT_COMPILER=${wrapper_compiler}

# Load system modules
module purge
module load ${13} # cmake
module load ${14} # clang for formatting
module load $COMPILER_MODULE # gcc
module load $MPI_MODULE # mpi
module load ${16} # cuda

# Initialize spack env
. ${HOME}/spack/share/spack/setup-env.sh

spack env activate ci

# Find compilers
spack compiler find

# Load Spack Modules

spack load hdf5@1.10.6%${compiler_package}@${compiler_version} \
^${mpi_package}@${mpi_version}%${compiler_package}@${compiler_version}

spack load py-h5py@2.10.0 ^hdf5@1.10.6%${compiler_package}@${compiler_version} \
^${mpi_package}@${mpi_version}%${compiler_package}@${compiler_version}

spack load py-mpi4py
spack load py-matplotlib
spack load py-numpy

# Setup build env
export OMP_PROC_BIND=close
export CTEST_OUTPUT_ON_FAILURE=1

# Build
if [ -d $1 ]
then
echo "Removing $1"
rm -rf $1/*
rmdir $1
fi
echo "Creating build folder $1"
mkdir $1
cd $1

# exit when any of the following commands fail
set -e

# Display build command
echo "cmake \
-DCMAKE_BUILD_TYPE=$2 \
-DCMAKE_CXX_COMPILER=$3 \
-DKokkos_ARCH_POWER9=$4 \
-DKokkos_ARCH_VOLTA70=$5 \
-DKokkos_ENABLE_CUDA=$6 \
-DKokkos_ENABLE_CUDA_UVM=$7 \
-DKokkos_ENABLE_OPENMP=$8 \
-DNUM_GPU_DEVICES_PER_NODE=${9} \
-DNUM_MPI_PROC_TESTING=${10} \
-DOMP_NUM_THREADS=${11} \
-DPARTHENON_DISABLE_HDF5=${12} \
../"

cmake \
-DCMAKE_BUILD_TYPE=$2 \
-DCMAKE_CXX_COMPILER=$3 \
-DKokkos_ARCH_POWER9=$4 \
-DKokkos_ARCH_VOLTA70=$5 \
-DKokkos_ENABLE_CUDA=$6 \
-DKokkos_ENABLE_CUDA_UVM=$7 \
-DKokkos_ENABLE_OPENMP=$8 \
-DNUM_GPU_DEVICES_PER_NODE=${9} \
-DNUM_MPI_PROC_TESTING=${10} \
-DOMP_NUM_THREADS=${11} \
-DPARTHENON_DISABLE_HDF5=${12} \
../
fail_or_pass=$?
[ ${fail_or_pass} -ne 0 ] && exit 1

make -j $J VERBOSE=1
fail_or_pass=$?
[ ${fail_or_pass} -ne 0 ] && exit 1

# Build in serial
ctest --output-on-failure
fail_or_pass=$?
[ ${fail_or_pass} -ne 0 ] && exit 1

exit 0
47 changes: 47 additions & 0 deletions scripts/darwin/build_hdf5_parallel.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/bash

# Load system env only
source /etc/bashrc
source /etc/profile

# Make sure home is pointing to current directory
export PARTHENON=$(pwd)
cd ../
export HOME=$(pwd)
cd ${PARTHENON}
# Calculate number of available cores
export J=$(( $(nproc --all) )) && echo Using ${J} cores during build

COMPILER_MODULE=$1
MPI_MODULE=$2

compiler_version=$(bash $PARTHENON/scripts/darwin/get_version.sh $COMPILER_MODULE)
compiler_package=$(bash $PARTHENON/scripts/darwin/get_package.sh $COMPILER_MODULE)
mpi_version=$(bash $PARTHENON/scripts/darwin/get_version.sh $MPI_MODULE)
mpi_package=$(bash $PARTHENON/scripts/darwin/get_package.sh $MPI_MODULE)

# Load system modules
module purge
module load $COMPILER_MODULE # gcc
module load $MPI_MODULE # mpi

# Initialize spack env
. ../spack/share/spack/setup-env.sh

spack env activate ci

# Find compilers
spack compiler find

# Install hdf5, will install numpy mpi4py and hdf5
spack install -y --overwrite -j ${J} py-h5py@2.10.0 ^hdf5@1.10.6%${compiler_package}@${compiler_version} \
^${mpi_package}@${mpi_version}%${compiler_package}@${compiler_version}

# Run garbage collection
spack gc -y

spack install -y --dont-restage -j ${J} py-matplotlib

# Run garbage collection
spack gc -y

17 changes: 17 additions & 0 deletions scripts/darwin/get_cpp.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash

# Works to return the c++ compiler provided a compiler package
#
# gcc
#
# Then g++ would be returned
COMPILER="$1"
if [[ "$COMPILER" == "gcc" ]]; then
echo "g++"
elif [[ "$COMPILER" == "clang" ]]; then
echo "clang++"
elif [[ "$COMPILER" == "openmpi" ]]; then
echo "mpic++"
else
echo "No matching package"
fi
17 changes: 17 additions & 0 deletions scripts/darwin/get_package.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash

# Works to return the package if provided with a module, where the package is of the form
#
# openmpi/2.1.5-pgi_18.3
#
# The package name appears first and before a /
MODULE="$1"
if [[ "$1" == "gcc/"* ]]; then
echo "gcc"
elif [[ "$1" == "clang/"* ]]; then
echo "clang"
elif [[ "$1" == "openmpi/"* ]]; then
echo "openmpi"
else
echo "No matching package"
fi
10 changes: 10 additions & 0 deletions scripts/darwin/get_version.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

# Works to get the version of a package given that it is provided a form

# openmpi/2.1.5-pgi_18.3

# Where the version of the package is indicated after the last / and before any -
string=$1
sub_str=$(printf "%s\n" "${string##*\/}")
echo "${sub_str%%-*}"
Loading

0 comments on commit 577bde3

Please sign in to comment.