Skip to content

Commit

Permalink
Merge branch 'NOAA-EMC:develop' into feature/gfsv17_issue_2125
Browse files Browse the repository at this point in the history
  • Loading branch information
HenryRWinterbottom authored Feb 13, 2024
2 parents 2d08d01 + 3f99f70 commit 929b903
Show file tree
Hide file tree
Showing 29 changed files with 677 additions and 84 deletions.
189 changes: 189 additions & 0 deletions Jenkinsfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
def Machine = 'none'
def machine = 'none'
def HOME = 'none'
def localworkspace = 'none'
def commonworkspace = 'none'

pipeline {
agent { label 'built-in' }

options {
skipDefaultCheckout()
buildDiscarder(logRotator(numToKeepStr: '2'))
}

stages { // This initial stage is used to get the Machine name from the GitHub labels on the PR
// which is used to designate the Nodes in the Jenkins Controler by the agent label
// Each Jenknis Node is connected to said machine via an JAVA agent via an ssh tunnel

stage('Get Machine') {
agent { label 'built-in' }
steps {
script {
localworkspace = env.WORKSPACE
machine = 'none'
for (label in pullRequest.labels) {
echo "Label: ${label}"
if ((label.matches('CI-Hera-Ready'))) {
machine = 'hera'
} else if ((label.matches('CI-Orion-Ready'))) {
machine = 'orion'
} else if ((label.matches('CI-Hercules-Ready'))) {
machine = 'hercules'
}
} // createing a second machine varible with first letter capital
// because the first letter of the machine name is captitalized in the GitHub labels
Machine = machine[0].toUpperCase() + machine.substring(1)
}
}
}

stage('Get Common Workspace') {
agent { label "${machine}-emc" }
steps {
script {
properties([parameters([[$class: 'NodeParameterDefinition', allowedSlaves: ['built-in', 'Hera-EMC', 'Orion-EMC'], defaultSlaves: ['built-in'], name: '', nodeEligibility: [$class: 'AllNodeEligibility'], triggerIfResult: 'allCases']])])
HOME = "${WORKSPACE}/TESTDIR"
commonworkspace = "${WORKSPACE}"
sh(script: "mkdir -p ${HOME}/RUNTESTS", returnStatus: true)
pullRequest.addLabel("CI-${Machine}-Building")
if (pullRequest.labels.any { value -> value.matches("CI-${Machine}-Ready") }) {
pullRequest.removeLabel("CI-${Machine}-Ready")
}
}
}
}

stage('Build System') {
matrix {
agent { label "${machine}-emc" }
//options {
// throttle(['global_matrix_build'])
//}
axes {
axis {
name 'system'
values 'gfs', 'gefs'
}
}
stages {
stage('build system') {
steps {
script {
def HOMEgfs = "${HOME}/${system}" // local HOMEgfs is used to build the system on per system basis under the common workspace HOME
sh(script: "mkdir -p ${HOMEgfs}", returnStatus: true)
ws(HOMEgfs) {
env.MACHINE_ID = machine // MACHINE_ID is used in the build scripts to determine the machine and is added to the shell environment
if (fileExists("${HOMEgfs}/sorc/BUILT_semaphor")) { // if the system is already built, skip the build in the case of re-runs
sh(script: "cat ${HOMEgfs}/sorc/BUILT_semaphor", returnStdout: true).trim() // TODO: and user configurable control to manage build semphore
ws(commonworkspace) { pullRequest.comment("Cloned PR already built (or build skipped) on ${machine} in directory ${HOMEgfs}") }
} else {
checkout scm
sh(script: 'source workflow/gw_setup.sh;which git;git --version;git submodule update --init --recursive', returnStatus: true)
def builds_file = readYaml file: 'ci/cases/yamls/build.yaml'
def build_args_list = builds_file['builds']
def build_args = build_args_list[system].join(' ').trim().replaceAll('null', '')
dir("${HOMEgfs}/sorc") {
sh(script: "${build_args}", returnStatus: true)
sh(script: './link_workflow.sh', returnStatus: true)
sh(script: "echo ${HOMEgfs} > BUILT_semaphor", returnStatus: true)
}
}
if (pullRequest.labels.any { value -> value.matches("CI-${Machine}-Building") }) {
pullRequest.removeLabel("CI-${Machine}-Building")
}
pullRequest.addLabel("CI-${Machine}-Running")
}
}
}
}
}
}
}

stage('Run Tests') {
matrix {
agent { label "${machine}-emc" }
axes {
axis {
name 'Case'
// TODO add dynamic list of cases from env vars (needs addtional plugins)
values 'C48_ATM', 'C48_S2SWA_gefs', 'C48_S2SW', 'C96_atm3DVar', 'C48mx500_3DVarAOWCDA', 'C96C48_hybatmDA', 'C96_atmsnowDA'
}
}
stages {
stage('Create Experiment') {
steps {
script {
sh(script: "sed -n '/{.*}/!p' ${HOME}/gfs/ci/cases/pr/${Case}.yaml > ${HOME}/gfs/ci/cases/pr/${Case}.yaml.tmp", returnStatus: true)
def yaml_case = readYaml file: "${HOME}/gfs/ci/cases/pr/${Case}.yaml.tmp"
system = yaml_case.experiment.system
def HOMEgfs = "${HOME}/${system}" // local HOMEgfs is used to populate the XML on per system basis
env.RUNTESTS = "${HOME}/RUNTESTS"
sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${Case}.yaml", returnStatus: true)
}
}
}
stage('Run Experiments') {
steps {
script {
HOMEgfs = "${HOME}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments
ws(HOMEgfs) {
pslot = sh(script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${HOME}/RUNTESTS ${Case}", returnStdout: true).trim()
pullRequest.comment("**Running** experiment: ${Case} on ${Machine}<br>With the experiment in directory:<br>`${HOME}/RUNTESTS/${pslot}`")
try {
sh(script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${HOME} ${pslot}", returnStatus: true)
} catch (Exception e) {
pullRequest.comment("**FAILURE** running experiment: ${Case} on ${Machine}")
error("Failed to run experiments ${Case} on ${Machine}")
}
pullRequest.comment("**SUCCESS** running experiment: ${Case} on ${Machine}")
}
}
}
}
}
}
}
}

post {
always {
script {
if(env.CHANGE_ID) {
for (label in pullRequest.labels) {
if (label.contains("${Machine}")) {
pullRequest.removeLabel(label)
}
}
}
}
}
success {
script {
if(env.CHANGE_ID) {
pullRequest.addLabel("CI-${Machine}-Passed")
def timestamp = new Date().format('MM dd HH:mm:ss', TimeZone.getTimeZone('America/New_York'))
pullRequest.comment("**CI SUCCESS** ${Machine} at ${timestamp}\n\nBuilt and ran in directory `${HOME}`")
}
}
}
failure {
script {
if(env.CHANGE_ID) {
pullRequest.addLabel("CI-${Machine}-Failed")
def timestamp = new Date().format('MM dd HH:mm:ss', TimeZone.getTimeZone('America/New_York'))
pullRequest.comment("**CI FAILED** ${Machine} at ${timestamp}<br>Built and ran in directory `${HOME}`")
}
if (fileExists('${HOME}/RUNTESTS/ci.log')) {
def fileContent = readFile '${HOME}/RUNTESTS/ci.log'
fileContent.eachLine { line ->
if (line.contains('.log')) {
archiveArtifacts artifacts: "${line}", fingerprint: true
}
}
}
}
}
}
}
22 changes: 22 additions & 0 deletions ci/cases/pr/C48mx500_3DVarAOWCDA.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
experiment:
system: gfs
mode: cycled

arguments:
pslot: {{ 'pslot' | getenv }}
app: S2S
resdetatmos: 48
resdetocean: 5.0
comroot: {{ 'RUNTESTS' | getenv }}/COMROOT
expdir: {{ 'RUNTESTS' | getenv }}/EXPDIR
icsdir: {{ 'ICSDIR_ROOT' | getenv }}/C48mx500
idate: 2021032412
edate: 2021032418
nens: 0
gfs_cyc: 0
start: warm
yaml: {{ HOMEgfs }}/ci/cases/yamls/soca_gfs_defaults_ci.yaml

skip_ci_on_hosts:
- orion
- hercules
3 changes: 3 additions & 0 deletions ci/cases/yamls/build.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
builds:
- gefs: './build_all.sh'
- gfs: './build_all.sh -gu'
5 changes: 5 additions & 0 deletions ci/cases/yamls/soca_gfs_defaults_ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
defaults:
!INC {{ HOMEgfs }}/parm/config/gfs/yaml/defaults.yaml
base:
ACCOUNT: {{ 'SLURM_ACCOUNT' | getenv }}
DO_JEDIOCNVAR: "YES"
4 changes: 3 additions & 1 deletion ci/scripts/run-check_ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ pslot=${2:-${pslot:-?}} # Name of the experiment being tested by this scr
# │   └── ${pslot}
# └── EXPDIR
# └── ${pslot}
HOMEgfs="${TEST_DIR}/HOMEgfs"
# Two system build directories created at build time gfs, and gdas
# TODO: Make this configurable (for now all scripts run from gfs for CI at runtime)
HOMEgfs="${TEST_DIR}/gfs"
RUNTESTS="${TEST_DIR}/RUNTESTS"

# Source modules and setup logging
Expand Down
124 changes: 110 additions & 14 deletions ci/scripts/utils/ci_utils.sh
Original file line number Diff line number Diff line change
@@ -1,24 +1,120 @@
#!/bin/env bash

function cancel_slurm_jobs() {
function determine_scheduler() {
if command -v sbatch &> /dev/null; then
echo "slurm";
elif command -v qsub &> /dev/null; then
echo "torque";
else
echo "unknown"
fi
}

# Usage: cancel_slurm_jobs <substring>
# Example: cancel_slurm_jobs "C48_ATM_3c4e7f74"
function cancel_batch_jobs() {
# Usage: cancel_batch_jobs <substring>
# Example: cancel_batch_jobs "C48_ATM_3c4e7f74"
#
# Cancel all Slurm jobs that have the given substring in their name
# Cancel all batch jobs that have the given substring in their name
# So like in the example all jobs with "C48_ATM_3c4e7f74"
# in their name will be canceled

local substring=$1
local job_ids
job_ids=$(squeue -u "${USER}" -h -o "%i")

for job_id in ${job_ids}; do
job_name=$(sacct -j "${job_id}" --format=JobName%100 | head -3 | tail -1 | sed -r 's/\s+//g') || true
if [[ "${job_name}" =~ ${substring} ]]; then
echo "Canceling Slurm Job ${job_name} with: scancel ${job_id}"
scancel "${job_id}"
continue
fi
done

scheduler=$(determine_scheduler)

if [[ "${schduler}" == "torque" ]]; then
job_ids=$(qstat -u "${USER}" | awk '{print $1}') || true

for job_id in ${job_ids}; do
job_name=$(qstat -f "${job_id}" | grep Job_Name | awk '{print $3}') || true
if [[ "${job_name}" =~ ${substring} ]]; then
echo "Canceling PBS Job ${job_name} with: qdel ${job_id}"
qdel "${job_id}"
continue
fi
done

elif [[ "${scheduler}" == "slurm" ]]; then

job_ids=$(squeue -u "${USER}" -h -o "%i")

for job_id in ${job_ids}; do
job_name=$(sacct -j "${job_id}" --format=JobName%100 | head -3 | tail -1 | sed -r 's/\s+//g') || true
if [[ "${job_name}" =~ ${substring} ]]; then
echo "Canceling Slurm Job ${job_name} with: scancel ${job_id}"
scancel "${job_id}"
continue
fi
done

else
echo "FATAL: Unknown/unsupported job scheduler"
exit 1
fi
}


function get_pr_case_list () {

#############################################################
# loop over every yaml file in the PR's ci/cases
# and create an run directory for each one for this PR loop
#############################################################
for yaml_config in "${HOMEgfs}/ci/cases/pr/"*.yaml; do
case=$(basename "${yaml_config}" .yaml) || true
echo "${case}"
done
}

function get_pslot_list () {

local RUNTESTS="${1}"

#############################################################
# loop over expdir directories in RUNTESTS
# and create list of the directory names (pslot) with the hash tag
#############################################################
for pslot_dir in "${RUNTESTS}/EXPDIR/"*; do
pslot=$(basename "${pslot_dir}") || true
echo "${pslot}"
done

}

function get_pslot () {

local RUNTESTS="${1}"
local case="${2}"

#############################################################
# loop over expdir directories in RUNTESTS
# and return the name of the pslot with its tag that matches the case
#############################################################
for pslot_dir in "${RUNTESTS}/EXPDIR/"*; do
pslot=$(basename "${pslot_dir}")
check_case=$(echo "${pslot}" | rev | cut -d"_" -f2- | rev) || true
if [[ "${check_case}" == "${case}" ]]; then
echo "${pslot}"
break
fi
done

}

function create_experiment () {

local yaml_config="${1}"
cd "${HOMEgfs}" || exit 1
pr_sha=$(git rev-parse --short HEAD)
case=$(basename "${yaml_config}" .yaml) || true
export pslot=${case}_${pr_sha}

source "${HOMEgfs}/ci/platforms/config.${MACHINE_ID}"
source "${HOMEgfs}/workflow/gw_setup.sh"

# system=$(grep "system:" "${yaml_config}" | cut -d":" -f2 | tr -d " ") || true

"${HOMEgfs}/${system}/workflow/create_experiment.py" --overwrite --yaml "${yaml_config}"

}
9 changes: 9 additions & 0 deletions ci/scripts/utils/ci_utils_wrapper.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/usr/bin/env bash

HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." >/dev/null 2>&1 && pwd )"
source "${HOMEgfs}/ush/detect_machine.sh"

utitilty_function="${1}"

source "${HOMEgfs}/ci/scripts/utils/ci_utils.sh"
${utitilty_function} "${@:2}"
Loading

0 comments on commit 929b903

Please sign in to comment.