Skip to content

Commit

Permalink
Add machine defined limits on maximum processors and batch wall clock…
Browse files Browse the repository at this point in the history
… time (#349)

* add machine MAXPES and MAXRUNLENGTH settings

* update documentation

* update implementation so pes are updated in test case name
  • Loading branch information
apcraig committed Aug 23, 2019
1 parent 0033810 commit 9cb297b
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 44 deletions.
90 changes: 58 additions & 32 deletions cice.setup
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ if (${doabort} == true) then
exit -1
endif
# Create a new sets_base variable to store sets passed to cice.setup
set sets_base = "${sets}"
set bfbcomp_base = "$bfbcomp"
foreach compiler ( $ncompilers )
Expand All @@ -457,13 +458,14 @@ EOF
continue
endif
source ${ICE_SCRIPTS}/machines/env.${machcomp} -nomodules || exit 2
# Obtain the test name, sets, grid, and PE information from .ts file
set test = `echo $line | cut -d' ' -f1`
set grid = `echo $line | cut -d' ' -f2`
set pesx = `echo $line | cut -d' ' -f3`
set sets_tmp = `echo $line | cut -d' ' -f4`
set bfbcomp_tmp = `echo $line | cut -d' ' -f5`
# Create a new sets_base variable to store sets passed to cice.setup
# Append sets from .ts file to the $sets variable
set sets = "$sets_base,$sets_tmp"
Expand All @@ -481,6 +483,61 @@ EOF
set fbfbcomp = ${machcomp}_${bfbcomp}
endif
#------------------------------------------------------------
# Parse pesx with strict checking, limit pes for machine
set chck = `echo ${pesx} | sed 's/^[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*$/OK/'`
if (${chck} == OK) then
set task = `echo ${pesx} | cut -d x -f 1`
set thrd = `echo ${pesx} | cut -d x -f 2`
set blckx = `echo ${pesx} | cut -d x -f 3`
set blcky = `echo ${pesx} | cut -d x -f 4`
set mblck = `echo ${pesx} | cut -d x -f 5`
if ($?ICE_MACHINE_MAXPES) then
@ pesreq = ${task} * ${thrd}
if (${pesreq} > ${ICE_MACHINE_MAXPES}) then
@ task = ${ICE_MACHINE_MAXPES} / ${thrd}
@ mblck = ${mblck} * ((${pesreq} / ${ICE_MACHINE_MAXPES}) + 1)
endif
endif
set pesx = ${task}x${thrd}x${blckx}x${blcky}x${mblck}
else
set chck = `echo ${pesx} | sed 's/^[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*$/OK/'`
if (${chck} == OK) then
set task = `echo ${pesx} | cut -d x -f 1`
set thrd = `echo ${pesx} | cut -d x -f 2`
set blckx = `echo ${pesx} | cut -d x -f 3`
set blcky = `echo ${pesx} | cut -d x -f 4`
set mblck = 0
if ($?ICE_MACHINE_MAXPES) then
@ pesreq = ${task} * ${thrd}
if (${pesreq} > ${ICE_MACHINE_MAXPES}) then
@ task = ${ICE_MACHINE_MAXPES} / ${thrd}
endif
endif
set pesx = ${task}x${thrd}x${blckx}x${blcky}
else
set chck = `echo ${pesx} | sed 's/^[0-9][0-9]*x[0-9][0-9]*$/OK/'`
if (${chck} == OK) then
set task = `echo ${pesx} | cut -d x -f 1`
set thrd = `echo ${pesx} | cut -d x -f 2`
set blckx = 0
set blcky = 0
set mblck = 0
if ($?ICE_MACHINE_MAXPES) then
@ pesreq = ${task} * ${thrd}
if (${pesreq} > ${ICE_MACHINE_MAXPES}) then
@ task = ${ICE_MACHINE_MAXPES} / ${thrd}
endif
endif
set pesx = ${task}x${thrd}
else
echo "${0}: ERROR in -p argument, ${pesx}, must be [m]x[n], [m]x[n]x[bx]x[by], or [m]x[n]x[bx]x[by]x[mb] "
exit -1
endif
endif
endif
set testname_noid = ${spval}
# create case for test cases
if (${docase} == 0) then
Expand Down Expand Up @@ -559,7 +616,6 @@ EOF
end
cd ${casedir}
source ./env.${machcomp} -nomodules || exit 2
set quietmode = false
if ($?ICE_MACHINE_QUIETMODE) then
Expand All @@ -585,36 +641,6 @@ EOF
#------------------------------------------------------------
# Compute a default blocksize
set chck = `echo ${pesx} | sed 's/^[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*$/OK/'`
if (${chck} == OK) then
set task = `echo ${pesx} | cut -d x -f 1`
set thrd = `echo ${pesx} | cut -d x -f 2`
set blckx = `echo ${pesx} | cut -d x -f 3`
set blcky = `echo ${pesx} | cut -d x -f 4`
set mblck = `echo ${pesx} | cut -d x -f 5`
else
set chck = `echo ${pesx} | sed 's/^[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*$/OK/'`
if (${chck} == OK) then
set task = `echo ${pesx} | cut -d x -f 1`
set thrd = `echo ${pesx} | cut -d x -f 2`
set blckx = `echo ${pesx} | cut -d x -f 3`
set blcky = `echo ${pesx} | cut -d x -f 4`
set mblck = 0
else
set chck = `echo ${pesx} | sed 's/^[0-9][0-9]*x[0-9][0-9]*$/OK/'`
if (${chck} == OK) then
set task = `echo ${pesx} | cut -d x -f 1`
set thrd = `echo ${pesx} | cut -d x -f 2`
set blckx = 0
set blcky = 0
set mblck = 0
else
echo "${0}: ERROR in -p argument, ${pesx}, must be [m]x[n], [m]x[n]x[bx]x[by], or [m]x[n]x[bx]x[by]x[mb] "
exit -1
endif
endif
endif
setenv ICE_DECOMP_GRID ${grid}
setenv ICE_DECOMP_NTASK ${task}
setenv ICE_DECOMP_NTHRD ${thrd}
Expand Down
25 changes: 16 additions & 9 deletions configuration/scripts/cice.batch.csh
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,24 @@ if (${taskpernodelimit} > ${ntasks}) set taskpernodelimit = ${ntasks}
set ptile = $taskpernode
if ($ptile > ${maxtpn} / 2) @ ptile = ${maxtpn} / 2

set runlength = ${ICE_RUNLENGTH}
if ($?ICE_MACHINE_MAXRUNLENGTH) then
if (${runlength} > ${ICE_MACHINE_MAXRUNLENGTH}) then
set runlength = ${ICE_MACHINE_MAXRUNLENGTH}
endif
endif

set queue = "${ICE_QUEUE}"
set batchtime = "00:15:00"
if (${ICE_RUNLENGTH} > 0) set batchtime = "00:29:00"
if (${ICE_RUNLENGTH} == 1) set batchtime = "00:59:00"
if (${ICE_RUNLENGTH} == 2) set batchtime = "2:00:00"
if (${ICE_RUNLENGTH} == 3) set batchtime = "3:00:00"
if (${ICE_RUNLENGTH} == 4) set batchtime = "4:00:00"
if (${ICE_RUNLENGTH} == 5) set batchtime = "5:00:00"
if (${ICE_RUNLENGTH} == 6) set batchtime = "6:00:00"
if (${ICE_RUNLENGTH} == 7) set batchtime = "7:00:00"
if (${ICE_RUNLENGTH} >= 8) set batchtime = "8:00:00"
if (${runlength} == 0) set batchtime = "00:29:00"
if (${runlength} == 1) set batchtime = "00:59:00"
if (${runlength} == 2) set batchtime = "2:00:00"
if (${runlength} == 3) set batchtime = "3:00:00"
if (${runlength} == 4) set batchtime = "4:00:00"
if (${runlength} == 5) set batchtime = "5:00:00"
if (${runlength} == 6) set batchtime = "6:00:00"
if (${runlength} == 7) set batchtime = "7:00:00"
if (${runlength} >= 8) set batchtime = "8:00:00"

set shortcase = `echo ${ICE_CASENAME} | cut -c1-15`

Expand Down
2 changes: 1 addition & 1 deletion configuration/scripts/cice.settings
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ setenv ICE_BASEGEN undefined
setenv ICE_BASECOM undefined
setenv ICE_BFBCOMP undefined
setenv ICE_SPVAL undefined
setenv ICE_RUNLENGTH 0
setenv ICE_RUNLENGTH -1
setenv ICE_ACCOUNT undefined
setenv ICE_QUEUE undefined

Expand Down
4 changes: 3 additions & 1 deletion configuration/scripts/machines/env.conrad_intel
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ setenv ICE_MACHINE_BASELINE $WORKDIR/CICE_BASELINE
setenv ICE_MACHINE_SUBMIT "qsub "
setenv ICE_MACHINE_ACCT P00000000
setenv ICE_MACHINE_QUEUE "debug"
setenv ICE_MACHINE_TPNODE 32 # tasks per node
setenv ICE_MACHINE_TPNODE 32 # tasks per node
setenv ICE_MACHINE_MAXPES 8000 # maximum total pes (tasks * threads) available
setenv ICE_MACHINE_MAXRUNLENGTH 168 # maximum batch wall time limit in hours (integer)
setenv ICE_MACHINE_BLDTHRDS 4
setenv ICE_MACHINE_QSTAT "qstat "
4 changes: 3 additions & 1 deletion configuration/scripts/machines/env.travisCI_gnu
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ setenv ICE_MACHINE_WKDIR ~/CICE_RUNS
setenv ICE_MACHINE_INPUTDATA ~
setenv ICE_MACHINE_BASELINE ~/CICE_BASELINE
setenv ICE_MACHINE_SUBMIT " "
setenv ICE_MACHINE_TPNODE 4
setenv ICE_MACHINE_TPNODE 4 # maximum tasks per node
setenv ICE_MACHINE_MAXPES 4 # maximum total pes (tasks * threads) available
setenv ICE_MACHINE_MAXRUNLENGTH 1 # maximum batch wall time limit in hours (integer)
setenv ICE_MACHINE_ACCT P0000000
setenv ICE_MACHINE_QUEUE "default"
setenv ICE_MACHINE_BLDTHRDS 1
Expand Down
30 changes: 30 additions & 0 deletions doc/source/user_guide/ug_running.rst
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,35 @@ directory back to **configuration/scripts/machines/** and update
the **configuration/scripts/cice.batch.csh** file, retest,
and then add and commit the updated machine files to the repository.

.. _machvars:

Machine variables
~~~~~~~~~~~~~~~~~~~~~~

There are several machine specific variables defined in the **env.$[machine]**. These
variables are used to generate working cases for a given machine, compiler, and batch
system. Some variables are optional.

.. csv-table:: *Machine Settings*
:header: "variable", "format", "description"
:widths: 15, 15, 25

"ICE_MACHINE_ENVNAME", "string", "machine name"
"ICE_MACHINE_COMPILER", "string", "compiler"
"ICE_MACHINE_MAKE", "string", "make command"
"ICE_MACHINE_WKDIR", "string", "root work directory"
"ICE_MACHINE_INPUTDATA", "string", "root input data directory"
"ICE_MACHINE_BASELINE", "string", "root regression baseline directory"
"ICE_MACHINE_SUBMIT", "string", "batch job submission command"
"ICE_MACHINE_TPNODE", "integer", "machine maximum MPI tasks per node"
"ICE_MACHINE_MAXPES", "integer", "machine maximum total processors per job (optional)"
"ICE_MACHINE_MAXRUNLENGTH", "integer", "batch wall time limit in hours (optional)"
"ICE_MACHINE_ACCT", "string", "batch default account"
"ICE_MACHINE_QUEUE", "string", "batch default queue"
"ICE_MACHINE_BLDTHRDS", "integer", "number of threads used during build"
"ICE_MACHINE_QSTAT", "string", "batch job status command (optional)"
"ICE_MACHINE_QUIETMODE", "true/false", "flag to reduce build output (optional)"

.. _cross_compiling:

Cross-compiling
Expand Down Expand Up @@ -460,3 +489,4 @@ does not include all of the capabilities present in the Python version.
To use the C-Shell version of the script, ::

$ ./timeseries.csh /p/work1/turner/CICE_RUNS/conrad_intel_smoke_col_1x1_diag1_run1year.t00/

0 comments on commit 9cb297b

Please sign in to comment.