diff --git a/.github/workflows/python_linter.yaml b/.github/workflows/python_linter.yaml index f7463862d5..b53fc50855 100644 --- a/.github/workflows/python_linter.yaml +++ b/.github/workflows/python_linter.yaml @@ -34,4 +34,4 @@ jobs: - name: Lint the test directory run: | export PYTHONPATH=${PWD}/ush - pylint --ignore-imports=yes tests/test_python/ + pylint --min-similarity-lines=15 --ignore-imports=yes tests/test_python/ diff --git a/jobs/JREGIONAL_GET_DA_OBS b/jobs/JREGIONAL_GET_DA_OBS new file mode 100755 index 0000000000..ef95580f26 --- /dev/null +++ b/jobs/JREGIONAL_GET_DA_OBS @@ -0,0 +1,89 @@ +#!/bin/bash + +# +#----------------------------------------------------------------------- +# +# Source the variable definitions file and the bash utility functions. +# +#----------------------------------------------------------------------- +# +. $USHdir/source_util_funcs.sh +source_config_for_task "task_run_anl|task_run_enkf" ${GLOBAL_VAR_DEFNS_FP} +. $USHdir/job_preamble.sh +# +#----------------------------------------------------------------------- +# +# Save current shell options (in a global array). Then set new options +# for this script/function. +# +#----------------------------------------------------------------------- +# +{ save_shell_opts; . $USHdir/preamble.sh; } > /dev/null 2>&1 +# +#----------------------------------------------------------------------- +# +# Get the full path to the file in which this script/function is located +# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in +# which the file is located (scrfunc_dir). +# +#----------------------------------------------------------------------- +# +scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) +scrfunc_fn=$( basename "${scrfunc_fp}" ) +scrfunc_dir=$( dirname "${scrfunc_fp}" ) +# +#----------------------------------------------------------------------- +# +# Print message indicating entry into script. +# +#----------------------------------------------------------------------- +# +print_info_msg " +======================================================================== +Entering script: \"${scrfunc_fn}\" +In directory: \"${scrfunc_dir}\" + +This script retrieves observation data for RRFS data assimilation tasks. +========================================================================" + +# +#----------------------------------------------------------------------- +# +# Create the directory where the GSI obs files should be stored +# +#----------------------------------------------------------------------- +# +export DATA="${COMIN}/obs" +mkdir_vrfy -p "${DATA}" + +# Set needed date/time variables +export START_DATE=$(echo "${PDY} ${cyc}") +export YYYYMMDDHH=$(date +%Y%m%d%H -d "${START_DATE}") + +# +#----------------------------------------------------------------------- +# +# Call the ex-script for this J-job +# +#----------------------------------------------------------------------- +# +$SCRIPTSdir/exregional_get_da_obs.sh || \ +print_err_msg_exit "\ +Call to ex-script corresponding to J-job \"${scrfunc_fn}\" failed." +# +#----------------------------------------------------------------------- +# +# Run job postamble. +# +#----------------------------------------------------------------------- +# +job_postamble +# +#----------------------------------------------------------------------- +# +# Restore the shell options saved at the beginning of this script/function. +# +#----------------------------------------------------------------------- +# +{ restore_shell_opts; } > /dev/null 2>&1 + diff --git a/jobs/JREGIONAL_GET_EXTRN_MDL_FILES b/jobs/JREGIONAL_GET_EXTRN_MDL_FILES index 98093a4abc..7ef397b738 100755 --- a/jobs/JREGIONAL_GET_EXTRN_MDL_FILES +++ b/jobs/JREGIONAL_GET_EXTRN_MDL_FILES @@ -216,7 +216,7 @@ esac # #----------------------------------------------------------------------- # -# Create the directory where the exetrnal model files should be stored +# Create the directory where the external model files should be stored # #----------------------------------------------------------------------- # diff --git a/jobs/JREGIONAL_PROCESS_BUFR b/jobs/JREGIONAL_PROCESS_BUFROBS similarity index 90% rename from jobs/JREGIONAL_PROCESS_BUFR rename to jobs/JREGIONAL_PROCESS_BUFROBS index e12cc0da7a..111ba75425 100755 --- a/jobs/JREGIONAL_PROCESS_BUFR +++ b/jobs/JREGIONAL_PROCESS_BUFROBS @@ -29,7 +29,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -source_config_for_task "task_process_bufr" ${GLOBAL_VAR_DEFNS_FP} +source_config_for_task "task_process_bufrobs" ${GLOBAL_VAR_DEFNS_FP} . $USHdir/job_preamble.sh "TRUE" # #----------------------------------------------------------------------- @@ -75,11 +75,16 @@ the specified cycle. #----------------------------------------------------------------------- # if [ ${CYCLE_TYPE} == "spinup" ]; then - DATA="${DATA:-${COMIN}/process_bufr_spinup}" + DATA="${DATA:-${COMIN}/process_bufrobs_spinup}" else - DATA="${DATA:-${COMIN}/process_bufr}" + DATA="${DATA:-${COMIN}/process_bufrobs}" fi mkdir_vrfy -p ${DATA} + +# Set needed date/time variables +export START_DATE=$(echo "${PDY} ${cyc}") +export YYYYMMDDHH=$(date +%Y%m%d%H -d "${START_DATE}") + # #----------------------------------------------------------------------- # @@ -88,7 +93,7 @@ mkdir_vrfy -p ${DATA} # #----------------------------------------------------------------------- # -$SCRIPTSdir/exregional_process_bufr.sh || print_err_msg_exit "\ +$SCRIPTSdir/exregional_process_bufrobs.sh || print_err_msg_exit "\ Call to ex-script corresponding to J-job \"${scrfunc_fn}\" failed." # #----------------------------------------------------------------------- diff --git a/jobs/JREGIONAL_PROCESS_LIGHTNING b/jobs/JREGIONAL_PROCESS_LIGHTNING index f875fab7f6..5de45a87bf 100755 --- a/jobs/JREGIONAL_PROCESS_LIGHTNING +++ b/jobs/JREGIONAL_PROCESS_LIGHTNING @@ -16,7 +16,7 @@ #----------------------------------------------------------------------- # . ${USHdir}/source_util_funcs.sh -source_config_for_task "task_process_lightning" ${GLOBAL_VAR_DEFNS_FP} +source_config_for_task "task_process_bufrobs" ${GLOBAL_VAR_DEFNS_FP} . ${USHdir}/job_preamble.sh "TRUE" # #----------------------------------------------------------------------- diff --git a/modulefiles/tasks/hera/get_da_obs.local.lua b/modulefiles/tasks/hera/get_da_obs.local.lua new file mode 100644 index 0000000000..acf81facb1 --- /dev/null +++ b/modulefiles/tasks/hera/get_da_obs.local.lua @@ -0,0 +1,2 @@ +load("hpss") +load("python_srw") diff --git a/modulefiles/tasks/jet/get_da_obs.local.lua b/modulefiles/tasks/jet/get_da_obs.local.lua new file mode 100644 index 0000000000..acf81facb1 --- /dev/null +++ b/modulefiles/tasks/jet/get_da_obs.local.lua @@ -0,0 +1,2 @@ +load("hpss") +load("python_srw") diff --git a/parm/data_locations.yml b/parm/data_locations.yml index 98bd1aa0a1..774e1dd1dd 100644 --- a/parm/data_locations.yml +++ b/parm/data_locations.yml @@ -322,10 +322,12 @@ RAP_obs: archive_format: zip archive_path: - /BMC/fdr/Permanent/{yyyy}/{mm}/{dd}/data/grids/rap/obs + - /BMC/fdr/Permanent/{yyyy}/{mm}/{dd}/data/grids/rap/prepbufr archive_internal_dir: - - ./ + - "" archive_file_names: - "{yyyymmddhh}00.zip" + - "{yyyymmddhh}00.zip" file_names: obs: - "{yyyymmddhh}.rap.t{hh}z.prepbufr.tm00" @@ -358,6 +360,13 @@ RAP_obs: - "{yyyymmddhh}.rap.t{hh}z.satwnd.tm00.bufr_d" - "{yyyymmddhh}.rap.t{hh}z.sevasr.tm00.bufr_d" - "{yyyymmddhh}.rap.t{hh}z.ssmisu.tm00.bufr_d" + - "{yyyymmddhh}.rap_e.t{hh}z.prepbufr.tm00" + - "{yyyymmddhh}.rap_e.t{hh}z.1bamua.tm00.bufr_d" + - "{yyyymmddhh}.rap_e.t{hh}z.1bhrs4.tm00.bufr_d" + - "{yyyymmddhh}.rap_e.t{hh}z.1bmhs.tm00.bufr_d" + - "{yyyymmddhh}.rap_e.t{hh}z.lgycld.tm00.bufr_d" + - "{yyyymmddhh}.rap_e.t{hh}z.nexrad.tm00.bufr_d" + - "{yyyymmddhh}.rap_e.t{hh}z.satwnd.tm00.bufr_d" aws: protocol: download url: https://noaa-rap-pds.s3.amazonaws.com/rap.{yyyymmdd} diff --git a/parm/wflow/coldstart.yaml b/parm/wflow/coldstart.yaml index a66c743b98..98aa2e98fd 100644 --- a/parm/wflow/coldstart.yaml +++ b/parm/wflow/coldstart.yaml @@ -213,4 +213,3 @@ metatask_run_ensemble: taskdep: attrs: task: aqm_lbcs - diff --git a/parm/wflow/da_data_preproc.yaml b/parm/wflow/da_data_preproc.yaml new file mode 100644 index 0000000000..7090e21b3b --- /dev/null +++ b/parm/wflow/da_data_preproc.yaml @@ -0,0 +1,102 @@ +# This group contains all the tasks needed for preprocessing tasks for +# RRFS DA. + +default_data_preproc_task: &default_preproc + account: '&ACCOUNT;' + attrs: + cycledefs: #cycledefs_type# + maxtries: '1' + envars: &default_envars + GLOBAL_VAR_DEFNS_FP: '&GLOBAL_VAR_DEFNS_FP;' + USHdir: '&USHdir;' + PDY: !cycstr "@Y@m@d" + cyc: !cycstr "@H" + subcyc: !cycstr "@M" + LOGDIR: !cycstr "&LOGDIR;" + CYCLE_TYPE: '#cycle_type#' + native: '{{ platform.SCHED_NATIVE_CMD }}' + nodes: '{{ nnodes }}:ppn={{ ppn }}' + nnodes: 1 + nodesize: "&NCORES_PER_NODE;" + ppn: 1 + partition: '{% if platform.get("PARTITION_DEFAULT") %}&PARTITION_DEFAULT;{% else %}None{% endif %}' + queue: '&QUEUE_DEFAULT;' + walltime: 00:25:00 + + +task_get_da_obs: + <<: *default_preproc + command: '&LOAD_MODULES_RUN_TASK_FP; "get_da_obs" "&JOBSdir;/JREGIONAL_GET_DA_OBS"' + attrs: + cycledefs: forecast + maxtries: '1' + join: !cycstr '&LOGDIR;/{{ jobname }}_@Y@m@d@H&LOGEXT;' + partition: '&PARTITION_HPSS;' + queue: '&QUEUE_HPSS;' + dependency: + timedep: '@Y@m@d@H@M00' + +metatask_process_obs_cycle_type: + var: + cycledefs_type: forecast,long_forecast + cycle_type: prod + + task_process_radarref_#cycle_type#: + <<: *default_preproc + command: '&LOAD_MODULES_RUN_TASK_FP; "process_obs" "&JOBSdir;/JREGIONAL_PROCESS_RADARREF"' + ppn: 24 + join: !cycstr '&LOGDIR;/{{ jobname }}_@Y@m@d@H&LOGEXT;' + dependency: + or: + taskdep: + attrs: + task: get_da_obs + and: + not: + taskvalid: + attrs: + task: get_da_obs + streq: + left: do_real_time + right: '{% if workflow.DO_REAL_TIME %}do_real_time{% endif %}' + timedep: '@Y@m@d@H@M00' + + task_process_lightning_#cycle_type#: + <<: *default_preproc + command: '&LOAD_MODULES_RUN_TASK_FP; "process_obs" "&JOBSdir;/JREGIONAL_PROCESS_LIGHTNING"' + join: !cycstr '&LOGDIR;/{{ jobname }}_@Y@m@d@H&LOGEXT;' + dependency: + or: + taskdep: + attrs: + task: get_da_obs + and: + not: + taskvalid: + attrs: + task: get_da_obs + streq: + left: do_real_time + right: '{% if workflow.DO_REAL_TIME %}do_real_time{% endif %}' + timedep: '@Y@m@d@H@M00' + + + task_process_bufrobs_#cycle_type#: + <<: *default_preproc + command: '&LOAD_MODULES_RUN_TASK_FP; "process_obs" "&JOBSdir;/JREGIONAL_PROCESS_BUFROBS"' + join: !cycstr '&LOGDIR;/{{ jobname }}_@Y@m@d@H&LOGEXT;' + dependency: + or: + taskdep: + attrs: + task: get_da_obs + and: + not: + taskvalid: + attrs: + task: get_da_obs + streq: + left: do_real_time + right: '{% if workflow.DO_REAL_TIME %}do_real_time{% endif %}' + timedep: '@Y@m@d@H@M00' + diff --git a/scripts/exregional_get_da_obs.sh b/scripts/exregional_get_da_obs.sh new file mode 100755 index 0000000000..82ff77bee8 --- /dev/null +++ b/scripts/exregional_get_da_obs.sh @@ -0,0 +1,195 @@ +#!/bin/bash + +# +#----------------------------------------------------------------------- +# +# Source the variable definitions file and the bash utility functions. +# +#----------------------------------------------------------------------- +# +. $USHdir/source_util_funcs.sh +source_config_for_task "task_get_da_obs" ${GLOBAL_VAR_DEFNS_FP} +# +#----------------------------------------------------------------------- +# +# Save current shell options (in a global array). Then set new options +# for this script/function. +# +#----------------------------------------------------------------------- +# +{ save_shell_opts; . $USHdir/preamble.sh; } > /dev/null 2>&1 +# +#----------------------------------------------------------------------- +# +# Get the full path to the file in which this script/function is located +# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in +# which the file is located (scrfunc_dir). +# +#----------------------------------------------------------------------- +# +scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) +scrfunc_fn=$( basename "${scrfunc_fp}" ) +scrfunc_dir=$( dirname "${scrfunc_fp}" ) +# +#----------------------------------------------------------------------- +# +# Print message indicating entry into script. +# +#----------------------------------------------------------------------- +# +print_info_msg " +======================================================================== +Entering script: \"${scrfunc_fn}\" +In directory: \"${scrfunc_dir}\" + +This is the ex-script for the task that retrieves observation data for +RRFS data assimilation tasks. +========================================================================" +# +#----------------------------------------------------------------------- +# +# Enter working directory; set up variables for call to retrieve_data.py +# +#----------------------------------------------------------------------- +# +print_info_msg "$VERBOSE" " +Entering working directory for observation files ..." + +cd_vrfy ${DATA} + +if [ $RUN_ENVIR = "nco" ]; then + EXTRN_DEFNS="${NET}.${cycle}.${EXTRN_MDL_NAME}.${ICS_OR_LBCS}.${EXTRN_MDL_VAR_DEFNS_FN}.sh" +else + EXTRN_DEFNS="${EXTRN_MDL_VAR_DEFNS_FN}.sh" +fi + +# +#----------------------------------------------------------------------- +# +# retrieve RAP obs bufr files +# +#----------------------------------------------------------------------- +# + +# Start array for templates for files we will retrieve +template_arr=() + +# Obs from different filenames depending on hour +set -x +if [[ ${cyc} -eq '00' || ${cyc} -eq '12' ]]; then + RAP=rap_e +else + RAP=rap +fi +# Bufr lightning obs +template_arr+=("${YYYYMMDDHH}.${RAP}.t${cyc}z.lghtng.tm00.bufr_d") +# NASA LaRC cloud bufr file +template_arr+=("${YYYYMMDDHH}.${RAP}.t${cyc}z.lgycld.tm00.bufr_d") +# Prepbufr obs file +template_arr+=("${YYYYMMDDHH}.${RAP}.t${cyc}z.prepbufr.tm00") + +additional_flags="" +if [ $SYMLINK_FIX_FILES = "TRUE" ]; then + additional_flags="$additional_flags \ + --symlink" +fi + +if [ -n "${RAP_OBS_BUFR:-}" ] ; then + data_stores="disk ${EXTRN_MDL_DATA_STORES}" + additional_flags="$additional_flags \ + --input_file_path ${RAP_OBS_BUFR}" +fi + +cmd=" +python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${PDY}${cyc} \ + --data_stores ${data_stores} \ + --data_type RAP_obs \ + --output_path ${DATA} \ + --summary_file ${EXTRN_DEFNS} \ + --file_templates ${template_arr[@]} \ + $additional_flags" + +$cmd || print_err_msg_exit "\ +Call to retrieve_data.py failed with a non-zero exit status. + +The command was: +${cmd} +" +# Link to GSI-expected filenames +mv_vrfy "${DATA}/${template_arr[0]}" "${DATA}/lghtngbufr" +mv_vrfy "${DATA}/${template_arr[1]}" "${DATA}/lgycld.bufr_d" +mv_vrfy "${DATA}/${template_arr[2]}" "${DATA}/prepbufr" + +# +#----------------------------------------------------------------------- +# +# retrieve NLDN NetCDF lightning obs +# +#----------------------------------------------------------------------- +# + +if [ "${NLDN_NEEDED:-}" = "TRUE" ]; then + template_arr=() + for incr in $(seq -25 5 5) ; do + filedate=$(date +"%y%j%H%M" -d "${START_DATE} ${incr} minutes ") + template_arr+=("${filedate}0005r") + done + + additional_flags="" + if [ $SYMLINK_FIX_FILES = "TRUE" ]; then + additional_flags="$additional_flags \ + --symlink" + fi + + if [ -n "${NLDN_LIGHTNING:-}" ] ; then + data_stores="disk ${EXTRN_MDL_DATA_STORES}" + additional_flags="$additional_flags \ + --input_file_path ${NLDN_LIGHTNING}" + fi + + cmd=" + python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${PDY}${cyc} \ + --data_stores ${data_stores} \ + --data_type RAP_obs \ + --output_path ${DATA} \ + --summary_file ${EXTRN_DEFNS} \ + --file_templates ${template_arr[@]} \ + $additional_flags" + + $cmd || print_err_msg_exit "\ + Call to retrieve_data.py failed with a non-zero exit status. + + The command was: + ${cmd} + " + # Link to GSI-expected filenames + filenum=0 + for incr in $(seq -25 5 5) ; do + filedate=$(date +"%y%j%H%M" -d "${START_DATE} ${incr} minutes ") + filename="${filedate}0005r" + if [ -r ${filename} ]; then + ((filenum += 1 )) + mv_vrfy ${filename} ./NLDN_lightning_${filenum} + else + print_info_msg "WARNING: ${filename} does not exist" + fi + done + +fi + +# +#----------------------------------------------------------------------- +# +# Restore the shell options saved at the beginning of this script/function. +# +#----------------------------------------------------------------------- +# +{ restore_shell_opts; } > /dev/null 2>&1 diff --git a/scripts/exregional_get_extrn_mdl_files.sh b/scripts/exregional_get_extrn_mdl_files.sh index a67794a18a..e7004f7e2a 100755 --- a/scripts/exregional_get_extrn_mdl_files.sh +++ b/scripts/exregional_get_extrn_mdl_files.sh @@ -63,7 +63,7 @@ if [ "${ICS_OR_LBCS}" = "ICS" ]; then fcst_hrs=${TIME_OFFSET_HRS} file_names=${EXTRN_MDL_FILES_ICS[@]} if [ ${EXTRN_MDL_NAME} = FV3GFS ] || [ "${EXTRN_MDL_NAME}" == "GDAS" ] ; then - file_type=$FV3GFS_FILE_FMT_ICS + file_fmt=$FV3GFS_FILE_FMT_ICS fi input_file_path=${EXTRN_MDL_SOURCE_BASEDIR_ICS:-$EXTRN_MDL_SYSBASEDIR_ICS} @@ -92,7 +92,7 @@ elif [ "${ICS_OR_LBCS}" = "LBCS" ]; then fcst_hrs="${first_time} ${last_time} ${LBC_SPEC_INTVL_HRS}" file_names=${EXTRN_MDL_FILES_LBCS[@]} if [ ${EXTRN_MDL_NAME} = FV3GFS ] || [ "${EXTRN_MDL_NAME}" == "GDAS" ] ; then - file_type=$FV3GFS_FILE_FMT_LBCS + file_fmt=$FV3GFS_FILE_FMT_LBCS fi input_file_path=${EXTRN_MDL_SOURCE_BASEDIR_LBCS:-$EXTRN_MDL_SYSBASEDIR_LBCS} fi @@ -137,9 +137,9 @@ fi additional_flags="" -if [ -n "${file_type:-}" ] ; then +if [ -n "${file_fmt:-}" ] ; then additional_flags="$additional_flags \ - --file_type ${file_type}" + --file_fmt ${file_fmt}" fi if [ -n "${file_names:-}" ] ; then @@ -183,7 +183,7 @@ python3 -u ${USHdir}/retrieve_data.py \ --config ${PARMdir}/data_locations.yml \ --cycle_date ${EXTRN_MDL_CDATE} \ --data_stores ${data_stores} \ - --external_model ${EXTRN_MDL_NAME} \ + --data_type ${EXTRN_MDL_NAME} \ --fcst_hrs ${fcst_hrs[@]} \ --ics_or_lbcs ${ICS_OR_LBCS} \ --output_path ${EXTRN_MDL_STAGING_DIR}${mem_dir} \ diff --git a/scripts/exregional_process_bufr.sh b/scripts/exregional_process_bufrobs.sh similarity index 94% rename from scripts/exregional_process_bufr.sh rename to scripts/exregional_process_bufrobs.sh index ea4708d8a9..076882aed3 100755 --- a/scripts/exregional_process_bufr.sh +++ b/scripts/exregional_process_bufrobs.sh @@ -8,7 +8,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -source_config_for_task "task_process_bufr" ${GLOBAL_VAR_DEFNS_FP} +source_config_for_task "task_process_bufrobs" ${GLOBAL_VAR_DEFNS_FP} # #----------------------------------------------------------------------- # @@ -88,19 +88,6 @@ cp_vrfy ${pregen_grid_dir}/fv3_grid_spec fv3sar_grid_spec.nc BUFR_TABLE=${FIXgsi}/prepobs_prep_RAP.bufrtable cp_vrfy $BUFR_TABLE prepobs_prep.bufrtable -#----------------------------------------------------------------------- -# -# set observation soruce -# -#----------------------------------------------------------------------- -obs_source=rap -if [[ ${HH} -eq '00' || ${HH} -eq '12' ]]; then - obs_source=rap_e -fi - -# evaluate template path that uses `obs_source` -eval OBSPATH_TEMPLATE=${OBSPATH_TEMPLATE} - # #----------------------------------------------------------------------- #----------------------------------------------------------------------- @@ -109,7 +96,7 @@ eval OBSPATH_TEMPLATE=${OBSPATH_TEMPLATE} # #----------------------------------------------------------------------- -obs_file=${OBSPATH_TEMPLATE}.t${HH}z.lghtng.tm00.bufr_d +obs_file="${COMIN}/obs/lghtngbufr" print_info_msg "$VERBOSE" "obsfile is $obs_file" run_lightning=false if [ -r "${obs_file}" ]; then @@ -185,7 +172,7 @@ fi # #----------------------------------------------------------------------- -obs_file=${OBSPATH_TEMPLATE}.t${HH}z.lgycld.tm00.bufr_d +obs_file="${COMIN}/obs/lgycld.bufr_d" print_info_msg "$VERBOSE" "obsfile is $obs_file" run_cloud=false if [ -r "${obs_file}" ]; then @@ -268,7 +255,7 @@ fi # #----------------------------------------------------------------------- -obs_file=${OBSPATH_TEMPLATE}.t${HH}z.prepbufr.tm00 +obs_file="${COMIN}/obs/prepbufr" print_info_msg "$VERBOSE" "obsfile is $obs_file" run_metar=false if [ -r "${obs_file}" ]; then diff --git a/scripts/exregional_process_lightning.sh b/scripts/exregional_process_lightning.sh index e41eae34fd..6dbd315e90 100755 --- a/scripts/exregional_process_lightning.sh +++ b/scripts/exregional_process_lightning.sh @@ -89,29 +89,10 @@ cp_vrfy ${pregen_grid_dir}/fv3_grid_spec fv3sar_grid_spec.nc run_lightning=false filenum=0 -for incr in $(seq -25 5 5) ; do - filedate=$(date +"%y%j%H%M" -d "${START_DATE} ${incr} minutes ") - filename=${LIGHTNING_ROOT}/${filedate}0005r - if [ -r ${filename} ]; then - ((filenum += 1 )) - ln -sf ${filename} ./NLDN_lightning_${filenum} - run_lightning=true - else - echo " ${filename} does not exist" - fi +for file in "${COMIN}/obs/NLDN_lightning_*"; do + ln_vrfy ${file} . done -echo "found GLD360 files: ${filenum}" - -#----------------------------------------------------------------------- -# -# copy bufr table from fix directory -# -#----------------------------------------------------------------------- -BUFR_TABLE=${FIXgsi}/prepobs_prep_RAP.bufrtable - -cp_vrfy $BUFR_TABLE prepobs_prep.bufrtable - #----------------------------------------------------------------------- # # Build namelist and run executable diff --git a/scripts/exregional_process_radarref.sh b/scripts/exregional_process_radarref.sh index 5fef67c0f3..923c9c52c3 100755 --- a/scripts/exregional_process_radarref.sh +++ b/scripts/exregional_process_radarref.sh @@ -69,7 +69,6 @@ YYYYMMDDHH=$(date +%Y%m%d%H -d "${START_DATE}") YYYY=${YYYYMMDDHH:0:4} MM=${YYYYMMDDHH:4:2} DD=${YYYYMMDDHH:6:2} -HH=${YYYYMMDDHH:8:2} # #----------------------------------------------------------------------- @@ -83,12 +82,12 @@ HH=${YYYYMMDDHH:8:2} BKTYPE=0 if [ ${DO_SPINUP} == "TRUE" ]; then if [ ${CYCLE_TYPE} == "spinup" ]; then - if [[ ${CYCL_HRS_SPINSTART[@]} =~ "$HH" ]] ; then + if [[ ${CYCL_HRS_SPINSTART[@]} =~ "$cyc" ]] ; then BKTYPE=1 fi fi else - if [[ ${CYCL_HRS_PRODSTART[@]} =~ "$HH" ]] ; then + if [[ ${CYCL_HRS_PRODSTART[@]} =~ "$cyc" ]] ; then BKTYPE=1 fi fi @@ -155,20 +154,20 @@ for timelevel in ${RADARREFL_TIMELEVEL[@]}; do for min in ${RADARREFL_MINS[@]} do min=$( printf %2.2i $((timelevel+min)) ) - echo "Looking for data valid:"${YYYY}"-"${MM}"-"${DD}" "${HH}":"${min} + echo "Looking for data valid:"${YYYY}"-"${MM}"-"${DD}" "${cyc}":"${min} sec=0 while [[ $sec -le 59 ]]; do ss=$(printf %2.2i ${sec}) - nsslfile=${NSSL}/*${mrms}_00.50_${YYYY}${MM}${DD}-${HH}${min}${ss}.${OBS_SUFFIX} + nsslfile=${NSSL}/*${mrms}_00.50_${YYYY}${MM}${DD}-${cyc}${min}${ss}.${OBS_SUFFIX} if [ -s $nsslfile ]; then echo 'Found '${nsslfile} - nsslfile1=*${mrms}_*_${YYYY}${MM}${DD}-${HH}${min}*.${OBS_SUFFIX} + nsslfile1=*${mrms}_*_${YYYY}${MM}${DD}-${cyc}${min}*.${OBS_SUFFIX} numgrib2=$(ls ${NSSL}/${nsslfile1} | wc -l) echo 'Number of GRIB-2 files: '${numgrib2} if [ ${numgrib2} -ge 10 ] && [ ! -e filelist_mrms ]; then cp ${NSSL}/${nsslfile1} . ls ${nsslfile1} > filelist_mrms - echo 'Creating links for ${YYYY}${MM}${DD}-${HH}${min}' + echo 'Creating links for ${YYYY}${MM}${DD}-${cyc}${min}' fi fi ((sec+=1)) @@ -180,7 +179,7 @@ for timelevel in ${RADARREFL_TIMELEVEL[@]}; do if [ ${OBS_SUFFIX} == "grib2.gz" ]; then gzip -d *.gz mv filelist_mrms filelist_mrms_org - ls MergedReflectivityQC_*_${YYYY}${MM}${DD}-${HH}????.grib2 > filelist_mrms + ls MergedReflectivityQC_*_${YYYY}${MM}${DD}-${cyc}????.grib2 > filelist_mrms fi numgrib2=$(more filelist_mrms | wc -l) diff --git a/tests/README.md b/tests/README.md index f48c78523e..0a0c58c842 100644 --- a/tests/README.md +++ b/tests/README.md @@ -1,8 +1,10 @@ -# Build test for the UFS Short-Range Weather App +# Test directory for the UFS Short-Range Weather Application -## Description +## Build tests -This script builds the executables for the UFS Short-Range Weather Application (SRW App) +### Description + +The build.sh script builds the executables for the UFS Short-Range Weather Application (SRW App) for the current code in the users ufs-srweather-app directory. It consists of the following steps: * Build all of the executables for the supported compilers on the given machine @@ -25,7 +27,7 @@ NOTE: To run the regional workflow using these executables, the ``EXECDIR`` var appropiate directory, for example: ``EXECDIR="${HOMEdir}/bin_intel/bin"``, where ``${HOMEdir}`` is the top-level directory of the cloned ufs-srweather-app repository. -## Usage +### Usage To run the tests, specify the machine name on the command line, for example: @@ -37,3 +39,40 @@ cd tests ``` Check the ``${HOMEdir}/tests/build_test$PID.out`` file for PASS/FAIL. + +## Unit tests + +The unit tests in the test_python/ directory test various parts of the workflow written in Python + +### Set PYTHONPATH + +First, you will need to set the PYTHONPATH environment variable to include the ush/ directory: + +``` +export PYTHONPATH=/path/to/ufs-srweather-app/ush:${PYTHONPATH} +``` + +### Set up HPSS tests + +Second, you will need to set up your environment for the HPSS tests, depending on your platform. If +on Jet or Hera, you should load the hpss module, so that the HPSS tests can load data from HPSS: + +``` +module load hpss +``` + +If on another platform without HPSS access, disable the HPSS tests by setting the following +variable: + +``` +export CI=true +``` + +### Run unit tests + +After those prep steps, you can run the unit tests with the following command (from the top-level +UFS SRW directory): + +``` +python3 -m unittest -b tests/test_python/*.py +``` diff --git a/tests/WE2E/run_WE2E_tests.py b/tests/WE2E/run_WE2E_tests.py index 741e034e94..78c0f8b449 100755 --- a/tests/WE2E/run_WE2E_tests.py +++ b/tests/WE2E/run_WE2E_tests.py @@ -145,6 +145,8 @@ def run_we2e_tests(homedir, args) -> None: logging.debug(f"For test {test_name}, constructing config.yaml") test_cfg = load_config_file(test) + if test_cfg.get('user') is None: + test_cfg['user'] = {} test_cfg['user'].update({"MACHINE": machine}) test_cfg['user'].update({"ACCOUNT": args.account}) if run_envir: @@ -491,8 +493,10 @@ def setup_logging(logfile: str = "log.run_WE2E_tests", debug: bool = False) -> N if args.modulefile is None: args.modulefile = f'build_{args.machine.lower()}_{args.compiler}' if args.procs < 1: - raise ValueError('You can not have less than one parallel process; select a valid value '\ + raise argparse.ArgumentTypeError('You can not have less than one parallel process; select a valid value '\ 'for --procs') + if not args.tests: + raise argparse.ArgumentTypeError('The --tests argument can not be empty') # Print test details (if requested) if args.print_test_info: diff --git a/tests/WE2E/test_configs/data_assimilation/config.process_obs.yaml b/tests/WE2E/test_configs/data_assimilation/config.process_obs.yaml new file mode 100644 index 0000000000..6c0741ae1e --- /dev/null +++ b/tests/WE2E/test_configs/data_assimilation/config.process_obs.yaml @@ -0,0 +1,24 @@ +metadata: + description: |- + Tests the data retrieval and data preprocessing tasks that are + precursors to running any data assimialtion components. Datasets + include bufr, lightning, and radar reflectivity. +user: + RUN_ENVIR: community +workflow: + PREDEF_GRID_NAME: RRFS_CONUS_3km + DATE_FIRST_CYCL: '2022072000' + DATE_LAST_CYCL: '2022072000' + +rocoto: + entities: + START_TIME_NSSLMOSAIC: "00:45:00" + START_TIME_LIGHTNING: "00:45:00" + START_TIME_CONVENTIONAL: "00:40:00" + cycledefs: + at_start: + tasks: + taskgroups: '{{ ["parm/wflow/da_data_preproc.yaml"]|include }}' + metatask_process_obs_cycle_type: + task_process_radarref_#cycle_type#: + task_process_lightning_#cycle_type#: diff --git a/tests/test_python/test_retrieve_data.py b/tests/test_python/test_retrieve_data.py index d40c2ee3d3..e35838c0dd 100644 --- a/tests/test_python/test_retrieve_data.py +++ b/tests/test_python/test_retrieve_data.py @@ -41,7 +41,7 @@ def setUp(self): "parm", "data_locations.yml" ) - twodaysago = datetime.datetime.today() - datetime.timedelta(days=2) + threedaysago = datetime.datetime.today() - datetime.timedelta(days=3) # Set test dates to retrieve, based on important dates in HPSS history: # 2019061200 - First operational FV3GFS cycle # 2020022518, 2020022600 - Changes to operational FV3GFS files between these cycles @@ -51,17 +51,17 @@ def setUp(self): self.dates={} self.dates["FV3GFSgrib2"] = ['2019061200', '2020022600', - twodaysago.strftime('%Y%m%d') + '12'] + threedaysago.strftime('%Y%m%d') + '12'] self.dates["FV3GFSnemsio"] = ['2019061200', '2020022518', '2021032018'] self.dates["FV3GFSnetcdf"] = ['2021032100', - twodaysago.strftime('%Y%m%d') + '00'] + threedaysago.strftime('%Y%m%d') + '00'] self.dates["RAPhpss"] = ['2018071118', '2020022618', - twodaysago.strftime('%Y%m%d') + '06'] + threedaysago.strftime('%Y%m%d') + '06'] self.dates["RAPaws"] = ['2021022200', - twodaysago.strftime('%Y%m%d%H')] + threedaysago.strftime('%Y%m%d%H')] @unittest.skipIf(os.environ.get("CI") == "true", "Skipping HPSS tests") @@ -77,12 +77,12 @@ def test_fv3gfs_grib2_from_hpss(self): '--config', self.config, '--cycle_date', date, '--data_stores', 'hpss', - '--external_model', 'FV3GFS', + '--data_type', 'FV3GFS', '--fcst_hrs', '6', '12', '3', '--output_path', tmp_dir, '--ics_or_lbcs', 'LBCS', '--debug', - '--file_type', 'grib2', + '--file_fmt', 'grib2', ] # fmt: on @@ -109,12 +109,12 @@ def test_fv3gfs_nemsio_lbcs_from_hpss(self): '--config', self.config, '--cycle_date', date, '--data_stores', 'hpss', - '--external_model', 'FV3GFS', + '--data_type', 'FV3GFS', '--fcst_hrs', '24', '--output_path', tmp_dir, '--ics_or_lbcs', 'LBCS', '--debug', - '--file_type', 'nemsio', + '--file_fmt', 'nemsio', ] # fmt: on @@ -143,12 +143,12 @@ def test_fv3gfs_netcdf_lbcs_from_hpss(self): '--config', self.config, '--cycle_date', date, '--data_stores', 'hpss', - '--external_model', 'FV3GFS', + '--data_type', 'FV3GFS', '--fcst_hrs', '24', '48', '24', '--output_path', tmp_dir, '--ics_or_lbcs', 'LBCS', '--debug', - '--file_type', 'netcdf', + '--file_fmt', 'netcdf', ] # fmt: on @@ -176,12 +176,12 @@ def test_gdas_ics_from_aws(self): '--config', self.config, '--cycle_date', '2022052512', '--data_stores', 'aws', - '--external_model', 'GDAS', + '--data_type', 'GDAS', '--fcst_hrs', '6', '9', '3', '--output_path', out_path_tmpl, '--ics_or_lbcs', 'LBCS', '--debug', - '--file_type', 'netcdf', + '--file_fmt', 'netcdf', '--members', '9', '10', ] # fmt: on @@ -204,7 +204,7 @@ def test_gefs_grib2_ics_from_aws(self): with tempfile.TemporaryDirectory(dir=self.path) as tmp_dir: os.chdir(tmp_dir) - out_path_tmpl = os.path.join(tmp_dir, "mem{{mem:03d}}") + out_path_tmpl = os.path.join(tmp_dir, "mem{mem:03d}") # fmt: off args = [ @@ -212,12 +212,12 @@ def test_gefs_grib2_ics_from_aws(self): '--config', self.config, '--cycle_date', '2022052512', '--data_stores', 'aws', - '--external_model', 'GEFS', + '--data_type', 'GEFS', '--fcst_hrs', '6', '--output_path', out_path_tmpl, '--ics_or_lbcs', 'ICS', '--debug', - '--file_type', 'netcdf', + '--file_fmt', 'netcdf', '--members', '1', '2', ] # fmt: on @@ -246,7 +246,7 @@ def test_hrrr_ics_from_hpss(self): '--config', self.config, '--cycle_date', '2022062512', '--data_stores', 'hpss', - '--external_model', 'HRRR', + '--data_type', 'HRRR', '--fcst_hrs', '0', '--output_path', tmp_dir, '--ics_or_lbcs', 'ICS', @@ -276,7 +276,7 @@ def test_hrrr_lbcs_from_hpss(self): '--config', self.config, '--cycle_date', '2022062512', '--data_stores', 'hpss', - '--external_model', 'HRRR', + '--data_type', 'HRRR', '--fcst_hrs', '3', '24', '3', '--output_path', tmp_dir, '--ics_or_lbcs', 'LBCS', @@ -305,7 +305,7 @@ def test_hrrr_ics_from_aws(self): '--config', self.config, '--cycle_date', '2022062512', '--data_stores', 'aws', - '--external_model', 'HRRR', + '--data_type', 'HRRR', '--fcst_hrs', '0', '--output_path', tmp_dir, '--ics_or_lbcs', 'ICS', @@ -334,7 +334,7 @@ def test_hrrr_lbcs_from_aws(self): '--config', self.config, '--cycle_date', '2022062512', '--data_stores', 'aws', - '--external_model', 'HRRR', + '--data_type', 'HRRR', '--fcst_hrs', '3', '24', '3', '--output_path', tmp_dir, '--ics_or_lbcs', 'LBCS', @@ -366,7 +366,7 @@ def test_rap_ics_from_hpss(self): '--config', self.config, '--cycle_date', date, '--data_stores', 'hpss', - '--external_model', 'RAP', + '--data_type', 'RAP', '--fcst_hrs', '3', '--output_path', tmp_dir, '--ics_or_lbcs', 'ICS', @@ -396,7 +396,7 @@ def test_rap_ics_from_aws(self): '--config', self.config, '--cycle_date', date, '--data_stores', 'aws', - '--external_model', 'RAP', + '--data_type', 'RAP', '--fcst_hrs', '3', '--output_path', tmp_dir, '--ics_or_lbcs', 'ICS', @@ -426,7 +426,7 @@ def test_rap_lbcs_from_aws(self): '--config', self.config, '--cycle_date', '2022062509', '--data_stores', 'aws', - '--external_model', 'RAP', + '--data_type', 'RAP', '--fcst_hrs', '3', '45', '6', '--output_path', tmp_dir, '--ics_or_lbcs', 'LBCS', @@ -455,12 +455,12 @@ def test_ufs_ics_from_aws(self): '--config', self.config, '--cycle_date', '2020072300', '--data_stores', 'aws', - '--external_model', 'UFS-CASE-STUDY', + '--data_type', 'UFS-CASE-STUDY', '--fcst_hrs', '0', '--output_path', tmp_dir, '--ics_or_lbcs', 'ICS', '--debug', - '--file_type', 'nemsio', + '--file_fmt', 'nemsio', '--check_file', ] # fmt: on @@ -481,15 +481,73 @@ def test_ufs_lbcs_from_aws(self): '--config', self.config, '--cycle_date', '2020072300', '--data_stores', 'aws', - '--external_model', 'UFS-CASE-STUDY', + '--data_type', 'UFS-CASE-STUDY', '--fcst_hrs', '3', '6', '3', '--output_path', tmp_dir, '--ics_or_lbcs', 'LBCS', '--debug', - '--file_type', 'nemsio', + '--file_fmt', 'nemsio', '--check_file', ] # fmt: on # Testing that there is no failure retrieve_data.main(args) + + @unittest.skipIf(os.environ.get("CI") == "true", "Skipping HPSS tests") + def test_rap_obs_from_hpss(self): + + """Get RAP observations from hpss for a 06z time""" + + with tempfile.TemporaryDirectory(dir=self.path) as tmp_dir: + os.chdir(tmp_dir) + + # fmt: off + args = [ + '--file_set', 'obs', + '--config', self.config, + '--cycle_date', '2023032106', + '--data_stores', 'hpss', + '--data_type', 'RAP_obs', + '--output_path', tmp_dir, + '--debug', + ] + # fmt: on + + retrieve_data.main(args) + + # Verify files exist in temp dir + + path = os.path.join(tmp_dir, "*") + files_on_disk = glob.glob(path) + self.assertEqual(len(files_on_disk), 30) + + @unittest.skipIf(os.environ.get("CI") == "true", "Skipping HPSS tests") + def test_rap_e_obs_from_hpss(self): + + """Get RAP observations from hpss for a 12z time; + at 00z and 12z we expect to see additional files + with the 'rap_e' naming convention""" + + with tempfile.TemporaryDirectory(dir=self.path) as tmp_dir: + os.chdir(tmp_dir) + + # fmt: off + args = [ + '--file_set', 'obs', + '--config', self.config, + '--cycle_date', '2023032112', + '--data_stores', 'hpss', + '--data_type', 'RAP_obs', + '--output_path', tmp_dir, + '--debug', + ] + # fmt: on + + retrieve_data.main(args) + + # Verify files exist in temp dir + + path = os.path.join(tmp_dir, "*") + files_on_disk = glob.glob(path) + self.assertEqual(len(files_on_disk), 37) diff --git a/tests/test_python/test_set_FV3nml_ens_stoch_seeds.py b/tests/test_python/test_set_FV3nml_ens_stoch_seeds.py index 3041afa4a1..70a4cdeb6b 100644 --- a/tests/test_python/test_set_FV3nml_ens_stoch_seeds.py +++ b/tests/test_python/test_set_FV3nml_ens_stoch_seeds.py @@ -35,7 +35,7 @@ def setUp(self): # Create an temporary experiment directory # pylint: disable=consider-using-with self.tmp_dir = tempfile.TemporaryDirectory( - dir=os.path.abspath("."), + dir=os.path.dirname(__file__), prefix="expt", ) EXPTDIR = self.tmp_dir.name diff --git a/tests/test_python/test_set_FV3nml_sfc_climo_filenames.py b/tests/test_python/test_set_FV3nml_sfc_climo_filenames.py index c39e468959..131af70506 100644 --- a/tests/test_python/test_set_FV3nml_sfc_climo_filenames.py +++ b/tests/test_python/test_set_FV3nml_sfc_climo_filenames.py @@ -29,10 +29,9 @@ def setUp(self): PARMdir = os.path.join(USHdir, "..", "parm") # Create a temporary experiment directory structure - here = os.getcwd() # pylint: disable=consider-using-with self.tmp_dir = tempfile.TemporaryDirectory( - dir=os.path.abspath(here), + dir=os.path.dirname(__file__), prefix="expt", ) EXPTDIR = self.tmp_dir.name diff --git a/ush/config.da_cycling.yaml b/ush/config.da_cycling.yaml new file mode 100644 index 0000000000..5e823009d9 --- /dev/null +++ b/ush/config.da_cycling.yaml @@ -0,0 +1,48 @@ +metadata: + description: >- + Work-in-progress prototype for DA cycling configuration file +user: + RUN_ENVIR: community + MACHINE: hera + ACCOUNT: an_account +platform: + MET_INSTALL_DIR: "" + METPLUS_PATH: "" + CCPA_OBS_DIR: "" + MRMS_OBS_DIR: "" + NDAS_OBS_DIR: "" +workflow: + USE_CRON_TO_RELAUNCH: false + EXPT_SUBDIR: test_da_cycling + CCPP_PHYS_SUITE: FV3_HRRR + PREDEF_GRID_NAME: RRFS_CONUS_3km + DATE_FIRST_CYCL: '2022072006' + DATE_LAST_CYCL: '2022072006' + FCST_LEN_HRS: 6 + PREEXISTING_DIR_METHOD: rename + VERBOSE: true + COMPILER: intel +task_get_extrn_ics: + EXTRN_MDL_NAME_ICS: FV3GFS + FV3GFS_FILE_FMT_ICS: grib2 +task_get_extrn_lbcs: + EXTRN_MDL_NAME_LBCS: FV3GFS + LBC_SPEC_INTVL_HRS: 6 + FV3GFS_FILE_FMT_LBCS: grib2 +task_run_fcst: + QUILTING: true +task_plot_allvars: + COMOUT_REF: "" +rocoto: + entities: + START_TIME_NSSLMOSAIC: 00:45:00 + START_TIME_LIGHTNING: 00:45:00 + START_TIME_CONVENTIONAL: 00:40:00 + tasks: + taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/da_data_preproc.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml"]|include }}' + metatask_process_obs_cycle_type: + task_process_radarref_#cycle_type#: + task_process_lightning_#cycle_type#: + metatask_run_ensemble: + task_run_fcst_mem#mem#: + walltime: 02:00:00 diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 2e1a7ef11a..44d0355fb4 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -454,12 +454,9 @@ platform: # # OBSPATH_NSSLMOSIAC: location of NSSL radar reflectivity # - # LIGHTNING_ROOT: location of lightning observations - # #----------------------------------------------------------------------- # OBSPATH_NSSLMOSIAC: "" - LIGHTNING_ROOT: "" #----------------------------- # WORKFLOW config parameters @@ -2164,14 +2161,30 @@ task_process_radarref: RADARREFL_TIMELEVEL: [0] OBS_SUFFIX: grib2 +#---------------------------- +# Parameters needed for retrieving DA obs +#----------------------------- +task_get_da_obs: + + # + #----------------------------------------------------------------------- + # + # NLDN_NEEDED + # whether to use NLDN observations in DA or not. + # + #----------------------------------------------------------------------- + # + NLDN_NEEDED: False + #---------------------------- # PROCESS BUFR config parameters #----------------------------- -task_process_bufr: +task_process_bufrobs: # #----------------------------------------------------------------------- # - # observation file templates used in process_bufr + # OBSPATH_TEMPLATE + # observation file templates used in process_bufrobs # #----------------------------------------------------------------------- # diff --git a/ush/generate_FV3LAM_wflow.py b/ush/generate_FV3LAM_wflow.py index 508cb49a8c..5b9ac01647 100755 --- a/ush/generate_FV3LAM_wflow.py +++ b/ush/generate_FV3LAM_wflow.py @@ -769,21 +769,18 @@ def setup_logging(logfile: str = "log.generate_FV3LAM_wflow", debug: bool = Fals """ ) ) - raise - - # Note workflow generation completion - log_info( - f""" - ======================================================================== - ======================================================================== + else: + # If no exception, note workflow generation completion + log_info( + f""" + ======================================================================== - Experiment generation completed. The experiment directory is: + Experiment generation completed. The experiment directory is: - EXPTDIR='{EXPTDIR}' + EXPTDIR='{EXPTDIR}' - ======================================================================== - ======================================================================== - """ - ) + ======================================================================== + """ + ) diff --git a/ush/machine/hera.yaml b/ush/machine/hera.yaml index 472356de94..3e76685635 100644 --- a/ush/machine/hera.yaml +++ b/ush/machine/hera.yaml @@ -37,22 +37,11 @@ platform: FIXsfc: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/fix/fix_sfc_climo FIXshp: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/NaturalEarth EXTRN_MDL_DATA_STORES: hpss aws nomads +task_get_da_obs: + RAP_OBS_BUFR: /scratch2/BMC/public/data/grids/rap/obs + RAP_OBS_NSSLMOSAIC: /scratch2/BMC/public/data/radar/nssl/mrms/conus + NLDN_LIGHTNING: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/rrfs_retro_data/lightning/vaisala/netcdf - -rocoto: - tasks: - metatask_run_ensemble: - task_run_fcst_mem#mem#: - cores: '{{ task_run_fcst.PE_MEMBER01 // 1 }}' - native: '--cpus-per-task {{ task_run_fcst.OMP_NUM_THREADS_RUN_FCST|int }} --exclusive {{ platform.SCHED_NATIVE_CMD }}' - nodes: - nnodes: - nodesize: - ppn: - -data: - obs: - RAP_obs: /scratch2/BMC/public/data/grids/rap/obs cpl_aqm_parm: AQM_CONFIG_DIR: /scratch2/NCEPDEV/naqfc/RRFS_CMAQ/aqm/epa/data AQM_BIO_DIR: /scratch2/NCEPDEV/naqfc/RRFS_CMAQ/aqm/bio @@ -65,4 +54,13 @@ cpl_aqm_parm: NEXUS_FIX_DIR: /scratch2/NCEPDEV/naqfc/RRFS_CMAQ/nexus/fix PT_SRC_BASEDIR: /scratch1/RDARCH/rda-arl-gpu/Barry.Baker/emissions/nexus/NEI2016v1/v2023-01-PT - +rocoto: + tasks: + metatask_run_ensemble: + task_run_fcst_mem#mem#: + cores: '{{ task_run_fcst.PE_MEMBER01 // 1 }}' + native: '--cpus-per-task {{ task_run_fcst.OMP_NUM_THREADS_RUN_FCST|int }} --exclusive {{ platform.SCHED_NATIVE_CMD }}' + nodes: + nnodes: + nodesize: + ppn: diff --git a/ush/machine/jet.yaml b/ush/machine/jet.yaml index b01dff2c0d..6b81641544 100644 --- a/ush/machine/jet.yaml +++ b/ush/machine/jet.yaml @@ -34,7 +34,10 @@ platform: FIXsfc: /mnt/lfs4/HFIP/hfv3gfs/role.epic/UFS_SRW_data/develop/fix/fix_sfc_climo FIXshp: /mnt/lfs4/HFIP/hfv3gfs/role.epic/UFS_SRW_data/develop/NaturalEarth EXTRN_MDL_DATA_STORES: hpss aws nomads - LIGHTNING_ROOT: /mnt/lfs4/HFIP/hfv3gfs/role.epic/UFS_SRW_data/develop/rrfs_retro_data/lightning/vaisala/netcdf +task_get_da_obs: + RAP_OBS_BUFR: /scratch2/BMC/public/data/grids/rap/obs + RAP_OBS_NSSLMOSAIC: /scratch2/BMC/public/data/radar/nssl/mrms/conus + NLDN_LIGHTNING: /mnt/lfs4/HFIP/hfv3gfs/role.epic/UFS_SRW_data/develop/rrfs_retro_data/lightning/vaisala/netcdf data: ics_lbcs: FV3GFS: @@ -43,9 +46,7 @@ data: netcdf: /public/data/grids/gfs/anl/netcdf RAP: /public/data/grids/rap/full/wrfprs/grib2 HRRR: /public/data/grids/hrrr/conus/wrfprs/grib2 - obs: - RAP_obs: /public/data/grids/rap/obs - GFS_obs: + GFS: prepbufr: /public/data/grids/gfs/prepbufr tcvitals: /public/data/grids/gfs/bufr @@ -59,3 +60,4 @@ rocoto: nnodes: nodesize: ppn: + diff --git a/ush/retrieve_data.py b/ush/retrieve_data.py index bd3425db51..1b6da4a4e2 100755 --- a/ush/retrieve_data.py +++ b/ush/retrieve_data.py @@ -313,8 +313,8 @@ def get_file_templates(cla, known_data_info, data_store, use_cla_tmpl=False): file_templates = cla.file_templates if cla.file_templates else file_templates if isinstance(file_templates, dict): - if cla.file_type is not None: - file_templates = file_templates[cla.file_type] + if cla.file_fmt is not None: + file_templates = file_templates[cla.file_fmt] file_templates = file_templates[cla.file_set] if not file_templates: msg = "No file naming convention found. They must be provided \ @@ -399,7 +399,7 @@ def get_requested_files(cla, file_templates, input_locs, method="disk", **kwargs mem=mem, ) logging.info(f"Getting file: {input_loc}") - + logging.debug(f"Target path: {target_path}") if method == "disk": if cla.symlink: retrieved = copy_file(input_loc, target_path, "ln -sf") @@ -421,16 +421,14 @@ def get_requested_files(cla, file_templates, input_locs, method="disk", **kwargs logging.debug(f"Retrieved status: {retrieved}") if not retrieved: unavailable.append(input_loc) - # Go on to the next location if the first file - # isn't found here. - break - # If retrieved, reset unavailable - unavailable = [] if not unavailable: # Start on the next fcst hour if all files were # found from a loc/template combo break + else: + logging.debug(f"Some files were not retrieved: {unavailable}") + logging.debug("Will check other locations for missing files") os.chdir(orig_path) return unavailable @@ -488,8 +486,8 @@ def hpss_requested_files(cla, file_names, store_specs, members=-1, ens_group=-1) # Could be a list of lists archive_file_names = store_specs.get("archive_file_names", {}) - if cla.file_type is not None: - archive_file_names = archive_file_names[cla.file_type] + if cla.file_fmt is not None: + archive_file_names = archive_file_names[cla.file_fmt] if isinstance(archive_file_names, dict): archive_file_names = archive_file_names[cla.file_set] @@ -575,11 +573,21 @@ def hpss_requested_files(cla, file_names, store_specs, members=-1, ens_group=-1) cmd = f'htar -xvf {existing_archive} {" ".join(source_paths)}' logging.info(f"Running command \n {cmd}") - subprocess.run( - cmd, - check=True, - shell=True, - ) + + try: + r = subprocess.run( + cmd, + check=False, + shell=True, + ) + except: + if r.returncode == 11: + # Continue if files missing from archive; we will check later if this is + # an acceptable condition + logging.warning("One or more files not found in zip archive") + pass + else: + raise Exception("Error running archive extraction command") # Check that files exist and Remove any data transfer artifacts. # Returns {'hpss': []}, turn that into a new dict of @@ -758,10 +766,6 @@ def main(argv): """ cla = parse_args(argv) - cla.fcst_hrs = arg_list_to_range(cla.fcst_hrs) - - if cla.members: - cla.members = arg_list_to_range(cla.members) setup_logging(cla.debug) print("Running script retrieve_data.py with args:", f"\n{('-' * 80)}\n{('-' * 80)}") @@ -796,21 +800,18 @@ def main(argv): ) sys.exit(1) - known_data_info = cla.config.get(cla.external_model, {}) + known_data_info = cla.config.get(cla.data_type, {}) if not known_data_info: - msg = dedent( - f"""No data stores have been defined for - {cla.external_model}! Only checking provided disk - location""" - ) + msg = f"No data stores have been defined for {cla.data_type}!" if cla.input_file_path is None: cla.data_stores = ["disk"] raise KeyError(msg) logging.info(msg) + logging.info(f"Checking provided disk location {cla.input_file_path}") unavailable = {} for data_store in cla.data_stores: - logging.info(f"Checking {data_store} for {cla.external_model}") + logging.info(f"Checking {data_store} for {cla.data_type}") store_specs = known_data_info.get(data_store, {}) if data_store == "disk": @@ -928,7 +929,7 @@ def parse_args(argv): help="Full path to a configuration file containing paths and \ naming conventions for known data streams. The default included \ in this repository is in parm/data_locations.yml", - required=True, + required=False, type=config_exists, ) @@ -949,7 +950,7 @@ def parse_args(argv): type=to_lower, ) parser.add_argument( - "--external_model", + "--data_type", choices=( "FV3GFS", "GFS_obs", @@ -976,7 +977,7 @@ def parse_args(argv): processed. If more than 3 arguments, the list is processed \ as-is. default=[0]", nargs="+", - required=False, # relaxed this arg option, and set a default value when not used + required=False, default=[0], type=int, ) @@ -990,7 +991,7 @@ def parse_args(argv): "--ics_or_lbcs", choices=("ICS", "LBCS"), help="Flag for whether ICS or LBCS.", - required=True + required=False ) # Optional @@ -1011,12 +1012,12 @@ def parse_args(argv): parser.add_argument( "--file_templates", help="One or more file template strings defining the naming \ - convention the be used for the files retrieved from disk. If \ + convention to be used for the files retrieved from disk. If \ not provided, the default names from hpss are used.", nargs="*", ) parser.add_argument( - "--file_type", + "--file_fmt", choices=("grib2", "nemsio", "netcdf", "prepbufr", "tcvitals"), help="External model file format", ) @@ -1049,7 +1050,29 @@ def parse_args(argv): but don't try to download them. Works with download protocol \ only", ) - return parser.parse_args(argv) + + # Make modifications/checks for given values + + args = parser.parse_args(argv) + + # convert range arguments if necessary + args.fcst_hrs = arg_list_to_range(args.fcst_hrs) + if args.members: + args.members = arg_list_to_range(args.members) + + # Check required arguments for various conditions + if not args.ics_or_lbcs and args.file_set in ["anl", "fcst"]: + raise argparse.ArgumentTypeError(f"--ics_or_lbcs is a required " \ + f"argument when --file_set = {args.file_set}") + + # Check valid arguments for various conditions + valid_data_stores = ["hpss", "nomads", "aws", "disk", "remote"] + for store in args.data_stores: + if store not in valid_data_stores: + raise argparse.ArgumentTypeError(f"Invalid value '{store}' provided " \ + f"for --data_stores; valid values are {valid_data_stores}") + + return args if __name__ == "__main__":