diff --git a/.github/jobs/docker_setup.sh b/.github/jobs/docker_setup.sh old mode 100755 new mode 100644 index ad20f64ecc..47116c0232 --- a/.github/jobs/docker_setup.sh +++ b/.github/jobs/docker_setup.sh @@ -67,7 +67,7 @@ fi # skip docker push if credentials are not set -if [ -z ${DOCKER_USERNAME+x} ] || [ -z ${DOCKER_PASSWORD+x} ]; then +if [ -z "${DOCKER_USERNAME}" ] || [ -z "${DOCKER_PASSWORD}" ]; then echo "DockerHub credentials not set. Skipping docker push" exit 0 fi diff --git a/.github/parm/use_case_groups.json b/.github/parm/use_case_groups.json index 2f6814059a..056013dd1b 100644 --- a/.github/parm/use_case_groups.json +++ b/.github/parm/use_case_groups.json @@ -56,7 +56,7 @@ }, { "category": "data_assimilation", - "index_list": "0", + "index_list": "0-1", "run": false }, { @@ -218,5 +218,10 @@ "category": "tc_and_extra_tc", "index_list": "3-5", "run": false + }, + { + "category": "unstructured_grids", + "index_list": "0", + "run": false } ] diff --git a/docs/_static/data_assimilation-StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.png b/docs/_static/data_assimilation-StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.png new file mode 100644 index 0000000000..633474d868 Binary files /dev/null and b/docs/_static/data_assimilation-StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.png differ diff --git a/docs/_static/unstructured_grids-StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.png b/docs/_static/unstructured_grids-StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.png new file mode 100644 index 0000000000..4475eeac05 Binary files /dev/null and b/docs/_static/unstructured_grids-StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.png differ diff --git a/docs/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.py b/docs/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.py new file mode 100644 index 0000000000..dfb8a7d747 --- /dev/null +++ b/docs/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.py @@ -0,0 +1,155 @@ +""" +StatAnalysis: IODAv2 +=========================================================================== + +model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.conf + +""" + +########################################### +# Scientific Objective +# -------------------- +# +# This use case demonstrates the Stat-Analysis tool and ingestion of HofX NetCDF files +# that have been output from the Joint Effort for Data assimilation Integration (JEDI) +# data assimilation system. JEDI uses IODA version 2 formatted files, which are NetCDF files +# with certain requirements of variables and naming conventions. These files +# hold observations to be assimilated into forecasts, in this case taken from the JEDI software +# test data, which contained a small number of Global observation-forecast pairs +# derived from the hofx application. +# +# UFO is a component of HofX, which maps the background forecast to observation space +# to form O minus B pairs. The HofX application of JEDI takes the input IODAv2 files and +# adds an additional variable which is the forecast value as interpolated to the +# observation location. These HofX files are used as input to form Matched Pair (MPR) +# formatted lists via Python embedding. In this case, Stat-Analysis then performs an aggregate_stat +# job and outputs statistics in an ascii file. +# +# This use case adopts the IODAv2 formatted NetCDF files, which replace the previous variable +# formatting scheme to make use of NetCDF groups. + +############################################################################## +# Datasets +# -------- +# +# +# | **Data source:** JEDI HofX output files in IODAv2 format +# +# | **Location:** All of the input data required for this use case can be found in the met_test sample data tarball. Click here to the METplus releases page and download sample data for the appropriate release: https://github.com/dtcenter/METplus/releases +# | The tarball should be unpacked into the directory that you will set the value of INPUT_BASE. See `Running METplus`_ section for more information. +# | + +############################################################################## +# METplus Components +# ------------------ +# +# This use case utilizes the METplus StatAnalysis wrapper to search for +# files that are valid for the given case and generate a command to run +# the MET tool stat_analysis. + +############################################################################## +# METplus Workflow +# ---------------- +# +# StatAnalysis is the only tool called in this example. It processes the following +# run times: +# +# | **Valid:** 2018-04-15_00Z +# | **Forecast lead:** 0 hour +# | + +############################################################################## +# METplus Configuration +# --------------------- +# +# METplus first loads all of the configuration files found in parm/metplus_config, +# then it loads any configuration files passed to METplus via the command line +# with the -c option, i.e. -c parm/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.conf +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.conf + +############################################################################## +# MET Configuration +# ----------------- +# +# METplus sets environment variables based on user settings in the METplus configuration file. +# See :ref:`How METplus controls MET config file settings` for more details. +# +# **YOU SHOULD NOT SET ANY OF THESE ENVIRONMENT VARIABLES YOURSELF! THEY WILL BE OVERWRITTEN BY METPLUS WHEN IT CALLS THE MET TOOLS!** +# +# If there is a setting in the MET configuration file that is currently not supported by METplus you'd like to control, please refer to: +# :ref:`Overriding Unsupported MET config file settings` +# +# .. note:: See the :ref:`StatAnalysis MET Configuration` section of the User's Guide for more information on the environment variables used in the file below: +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/met_config/STATAnalysisConfig_wrapped + +############################################################################## +# Python Embedding +# ---------------- +# +# This use case uses a Python embedding script to read input data +# +# parm/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed/read_iodav2_mpr.py +# +# .. highlight:: python +# .. literalinclude:: ../../../../parm/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed/read_iodav2_mpr.py +# + +############################################################################## +# Running METplus +# --------------- +# +# It is recommended to run this use case by: +# +# Passing in StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.conf then a user-specific system configuration file:: +# +# run_metplus.py -c /path/to/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.conf -c /path/to/user_system.conf +# +# The following METplus configuration variables must be set correctly to run this example.: +# +# * **INPUT_BASE** - Path to directory where sample data tarballs are unpacked (See Datasets section to obtain tarballs). +# * **OUTPUT_BASE** - Path where METplus output will be written. This must be in a location where you have write permissions +# * **MET_INSTALL_DIR** - Path to location where MET is installed locally +# +# Example User Configuration File:: +# +# [dir] +# INPUT_BASE = /path/to/sample/input/data +# OUTPUT_BASE = /path/to/output/dir +# MET_INSTALL_DIR = /path/to/met-X.Y +# +# **NOTE:** All of these items must be found under the [dir] section. +# + + +############################################################################## +# Expected Output +# --------------- +# +# A successful run will output the following both to the screen and to the logfile:: +# +# INFO: METplus has successfully finished running. +# +# Refer to the value set for **OUTPUT_BASE** to find where the output data was generated. +# Output for this use case will be found in StatAnalysis_IODAv2 (relative to **OUTPUT_BASE**) +# and will contain the following file: +# +# * dump.out + +############################################################################## +# Keywords +# -------- +# +# .. note:: +# +# * StatAnalysisToolUseCase +# * PythonEmbeddingFileUseCase +# * DataAssimilationUseCase +# +# Navigate to the :ref:`quick-search` page to discover other similar use cases. +# +# +# sphinx_gallery_thumbnail_path = '_static/data_assimilation-StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.png' diff --git a/docs/use_cases/model_applications/unstructured_grids/README.rst b/docs/use_cases/model_applications/unstructured_grids/README.rst new file mode 100644 index 0000000000..7b8d40d0f2 --- /dev/null +++ b/docs/use_cases/model_applications/unstructured_grids/README.rst @@ -0,0 +1,4 @@ +Unstructured Grids +------------------ + +Unstructured grids used by models for numerical weather prediction. diff --git a/docs/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.py b/docs/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.py new file mode 100644 index 0000000000..dd52057d21 --- /dev/null +++ b/docs/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.py @@ -0,0 +1,160 @@ +""" +StatAnalysis: UGRID +=========================================================================== + +model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf + +""" + +########################################### +# Scientific Objective +# -------------------- +# +# This use case demonstrates the use of python embedding to ingest and perform +# verification on an unstructured grid. This foregoes the need to interpolate +# to a regular grid as a step in the verification process, thereby avoiding +# any incurred interpolation error in the process. +# +# In particular, this use case ingests a UK MET Office LFRic forecast file in +# NetCDF format, which resides in the UGRID format of the cubed-sphere. The python +# library Iris was developed to perform analysis on various UGRID formats, and is +# employed here to ingest the file as well as perform direct interpolation +# from the native forecast grid to observation locations, thereby forming matched +# pairs to pass to stat_analysis. In order to perform the interpolation using a +# nearest-neighbors approach, the geovista python package is also used to form a +# KD tree to be used in identifying the interpolation points to be used. This +# package is located at https://github.com/bjlittle/geovista/ and can be installed +# from a development version. It is also required to install the pyvista python +# package. ASCII files containing observations are also ingested. +# +# The python embedding script itself performs the interpolation in time, and +# for this use case thins the observation data in order to reduce the run time. +# It is also noted that the observations for this use case were fabricated and +# correlated observation-forecast pairs are not expected. +# + +############################################################################## +# Datasets +# -------- +# +# +# | **Data source:** UK MET Office LFRic forecast files in UGRID NetCDF format and observations in ASCII format +# +# | **Location:** All of the input data required for this use case can be found in the met_test sample data tarball. Click here to the METplus releases page and download sample data for the appropriate release: https://github.com/dtcenter/METplus/releases +# | The tarball should be unpacked into the directory that you will set the value of INPUT_BASE. See `Running METplus`_ section for more information. +# | + +############################################################################## +# METplus Components +# ------------------ +# +# This use case utilizes the METplus StatAnalysis wrapper to search for +# files that are valid for the given case and generate a command to run +# the MET tool stat_analysis. + +############################################################################## +# METplus Workflow +# ---------------- +# +# StatAnalysis is the only tool called in this example. It processes the following +# run times: +# +# | **Valid:** 2021-05-05_00Z +# | **Forecast lead:** 12 hour +# | + +############################################################################## +# METplus Configuration +# --------------------- +# +# METplus first loads all of the configuration files found in parm/metplus_config, +# then it loads any configuration files passed to METplus via the command line +# with the -c option, i.e. -c parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf + +############################################################################## +# MET Configuration +# ----------------- +# +# METplus sets environment variables based on user settings in the METplus configuration file. +# See :ref:`How METplus controls MET config file settings` for more details. +# +# **YOU SHOULD NOT SET ANY OF THESE ENVIRONMENT VARIABLES YOURSELF! THEY WILL BE OVERWRITTEN BY METPLUS WHEN IT CALLS THE MET TOOLS!** +# +# If there is a setting in the MET configuration file that is currently not supported by METplus you'd like to control, please refer to: +# :ref:`Overriding Unsupported MET config file settings` +# +# .. note:: See the :ref:`StatAnalysis MET Configuration` section of the User's Guide for more information on the environment variables used in the file below: +# +# .. highlight:: bash +# .. literalinclude:: ../../../../parm/met_config/STATAnalysisConfig_wrapped + +############################################################################## +# Python Embedding +# ---------------- +# +# This use case uses a Python embedding script to read input data +# +# parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed/ugrid_lfric_mpr.py +# +# .. highlight:: python +# .. literalinclude:: ../../../../parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed/ugrid_lfric_mpr.py +# + +############################################################################## +# Running METplus +# --------------- +# +# It is recommended to run this use case by: +# +# Passing in StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf then a user-specific system configuration file:: +# +# run_metplus.py -c /path/to/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf -c /path/to/user_system.conf +# +# The following METplus configuration variables must be set correctly to run this example.: +# +# * **INPUT_BASE** - Path to directory where sample data tarballs are unpacked (See Datasets section to obtain tarballs). +# * **OUTPUT_BASE** - Path where METplus output will be written. This must be in a location where you have write permissions +# * **MET_INSTALL_DIR** - Path to location where MET is installed locally +# +# Example User Configuration File:: +# +# [dir] +# INPUT_BASE = /path/to/sample/input/data +# OUTPUT_BASE = /path/to/output/dir +# MET_INSTALL_DIR = /path/to/met-X.Y +# +# **NOTE:** All of these items must be found under the [dir] section. +# + + +############################################################################## +# Expected Output +# --------------- +# +# A successful run will output the following both to the screen and to the logfile:: +# +# INFO: METplus has successfully finished running. +# +# Refer to the value set for **OUTPUT_BASE** to find where the output data was generated. +# Output for this use case will be found in StatAnalysis_UGRID (relative to **OUTPUT_BASE**) +# and will contain the following file: +# +# * dump.out + +############################################################################## +# Keywords +# -------- +# +# .. note:: +# +# * StatAnalysisToolUseCase +# * PythonEmbeddingFileUseCase +# * UnstructureGridsUseCase +# +# Navigate to the :ref:`quick-search` page to discover other similar use cases. +# +# +# sphinx_gallery_thumbnail_path = '_static/unstructured_grids-StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.png' diff --git a/internal/scripts/docker_env/README.md b/internal/scripts/docker_env/README.md index 516a430613..4a46d422b3 100644 --- a/internal/scripts/docker_env/README.md +++ b/internal/scripts/docker_env/README.md @@ -306,6 +306,24 @@ needed to install the tools. It is not recommended to follow these instructions to install the GFDL Tracker using these steps. Please consult the documentation for the tool for installation instructions. +## geovista.v5 + +### Docker + +``` +docker build -t dtcenter/metplus-envs:geovista.v5 --build-arg ENV_NAME=geovista . +docker push dtcenter/metplus-envs:geovista.v5 +``` + +### Local + +``` +./scripts/geovista_env.sh + +cd /home/met_test/geovista +/home/met_test/.conda/envs/geovista.v5/bin/pip3 install --no-deps --editable . +``` + ## pytest.v5 (from metplus_base.v5) diff --git a/internal/scripts/docker_env/scripts/geovista_env.sh b/internal/scripts/docker_env/scripts/geovista_env.sh new file mode 100755 index 0000000000..4a885493b6 --- /dev/null +++ b/internal/scripts/docker_env/scripts/geovista_env.sh @@ -0,0 +1,32 @@ +#! /bin/sh + +################################################################################ +# Environment: geovista +# Last Updated: 2022-11-08 (mccabe@ucar.edu) +# Notes: Adds Python packages needed to run iris use case +# Requires development version of geovista package that is obtained +# from github.com/bjlittle/geovista +# Python Packages: +# geovista==0.1a1.dev462 +# xarray==2022.11.0 +# iris==3.3.1 +# +# Other Content: None +################################################################################ + +# Conda environment to create +ENV_NAME=geovista.v5 + +yum -y install git +git clone https://github.com/bjlittle/geovista.git +cd geovista +conda create -y -n ${ENV_NAME} --file requirements/locks/py310-lock-linux-64.txt + +# note: this command will not work on a local machine +# it is specific to docker +/usr/local/envs/${ENV_NAME}/bin/pip3 install --no-deps --editable . +cd - + +conda install -y --name ${ENV_NAME} -c conda-forge xarray==2022.11.0 +conda install -y --name ${ENV_NAME} -c conda-forge iris==3.3.1 + diff --git a/internal/tests/use_cases/all_use_cases.txt b/internal/tests/use_cases/all_use_cases.txt index ef633503aa..ca8f1cbc1c 100644 --- a/internal/tests/use_cases/all_use_cases.txt +++ b/internal/tests/use_cases/all_use_cases.txt @@ -73,6 +73,7 @@ Category: climate Category: data_assimilation 0::StatAnalysis_fcstHAFS_obsPrepBufr_JEDI_IODA_interface::model_applications/data_assimilation/StatAnalysis_fcstHAFS_obsPrepBufr_JEDI_IODA_interface.conf +0::StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed::model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.conf:: py_embed Category: marine_and_cryosphere @@ -166,3 +167,7 @@ Category: tc_and_extra_tc 3::GridStat_fcstHAFS_obsTDR_NetCDF:: model_applications/tc_and_extra_tc/GridStat_fcstHAFS_obsTDR_NetCDF.conf:: py_embed 4::TCPairs_TCStat_fcstADECK_obsBDECK_ATCF_BasicExample:: model_applications/tc_and_extra_tc/TCPairs_TCStat_fcstADECK_obsBDECK_ATCF_BasicExample.conf 5::TCGen_fcstGFS_obsBDECK_2021season:: model_applications/tc_and_extra_tc/TCGen_fcstGFS_obsBDECK_2021season.conf + + +Category: unstructured_grids +0::StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed:: model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf:: geovista_env, py_embed diff --git a/parm/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.conf b/parm/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.conf new file mode 100644 index 0000000000..c93fce9401 --- /dev/null +++ b/parm/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.conf @@ -0,0 +1,85 @@ +[config] + +# Documentation for this use case can be found at +# https://metplus.readthedocs.io/en/latest/generated/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed.html + +# For additional information, please see the METplus Users Guide. +# https://metplus.readthedocs.io/en/latest/Users_Guide + +### +# Processes to run +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#process-list +### + +PROCESS_LIST = StatAnalysis + + +### +# Time Info +# LOOP_BY options are INIT, VALID, RETRO, and REALTIME +# If set to INIT or RETRO: +# INIT_TIME_FMT, INIT_BEG, INIT_END, and INIT_INCREMENT must also be set +# If set to VALID or REALTIME: +# VALID_TIME_FMT, VALID_BEG, VALID_END, and VALID_INCREMENT must also be set +# LEAD_SEQ is the list of forecast leads to process +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#timing-control +### + +LOOP_BY = VALID + +VALID_TIME_FMT = %Y%m%d%H +VALID_BEG=2018041500 +VALID_END=2018041500 +VALID_INCREMENT = 12H + +LEAD_SEQ = 0 + + +### +# File I/O +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#directory-and-filename-template-info +### + +MODEL1_STAT_ANALYSIS_LOOKIN_DIR = python {PARM_BASE}/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed/read_iodav2_mpr.py {INPUT_BASE}/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed/sample_hofx_output_sondes.nc4 + +STAT_ANALYSIS_OUTPUT_DIR = {OUTPUT_BASE}/StatAnalysis_IODAv2 +STAT_ANALYSIS_OUTPUT_TEMPLATE = job.out +MODEL1_STAT_ANALYSIS_DUMP_ROW_TEMPLATE = dump.out + + +### +# StatAnalysis Settings +# https://metplus.readthedocs.io/en/latest/Users_Guide/wrappers.html#statanalysis +### + +MODEL1 = NA +MODEL1_OBTYPE = NA + +STAT_ANALYSIS_JOB_NAME = aggregate_stat +STAT_ANALYSIS_JOB_ARGS = -out_line_type CNT -dump_row [dump_row_file] -line_type MPR -by FCST_VAR + +MODEL_LIST = +DESC_LIST = +FCST_LEAD_LIST = +OBS_LEAD_LIST = +FCST_VALID_HOUR_LIST = +FCST_INIT_HOUR_LIST = +OBS_VALID_HOUR_LIST = +OBS_INIT_HOUR_LIST = +FCST_VAR_LIST = +OBS_VAR_LIST = +FCST_UNITS_LIST = +OBS_UNITS_LIST = +FCST_LEVEL_LIST = +OBS_LEVEL_LIST = +VX_MASK_LIST = +INTERP_MTHD_LIST = +INTERP_PNTS_LIST = +FCST_THRESH_LIST = +OBS_THRESH_LIST = +COV_THRESH_LIST = +ALPHA_LIST = +LINE_TYPE_LIST = + +GROUP_LIST_ITEMS = +LOOP_LIST_ITEMS = MODEL_LIST diff --git a/parm/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed/read_iodav2_mpr.py b/parm/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed/read_iodav2_mpr.py new file mode 100644 index 0000000000..de79edf64a --- /dev/null +++ b/parm/use_cases/model_applications/data_assimilation/StatAnalysis_fcstGFS_HofX_obsIODAv2_PyEmbed/read_iodav2_mpr.py @@ -0,0 +1,103 @@ +from __future__ import print_function + +import pandas as pd +import os +from glob import glob +import sys +import xarray as xr +import datetime as dt + +######################################################################## + +print('Python Script:\t', sys.argv[0]) + +# Input is .nc or .nc4 file + +if len(sys.argv) == 2: + # Read the input file as the first argument + input_path = os.path.expandvars(sys.argv[1]) + try: + print("Input File:\t" + repr(input_path)) + + # Read all the needed groups + ioda_data = xr.open_dataset(input_path, group = 'MetaData') + ioda_hofx_data = xr.open_dataset(input_path, group = 'hofx') + + hofx_vars = list(ioda_hofx_data.keys()) + + # use dataframes + ioda_df = ioda_data.to_dataframe() + ioda_data.close() + + for var_name in hofx_vars: + ioda_df[var_name + '@hofx'] = ioda_hofx_data[var_name] + + # Add columns for needed attributes, for each variable present for hofx + for attribute in ['ObsValue', 'ObsType', 'EffectiveQC']: + ioda_attr_data = xr.open_dataset(input_path, group = attribute) + for var_name in hofx_vars: + ioda_df[var_name + '@' + attribute] = ioda_attr_data[var_name] + + ioda_attr_data.close() + ioda_hofx_data.close() + + nlocs = len(ioda_df.index) + print('Number of locations in set: ' + str(nlocs)) + + # Decode strings + time = list(ioda_df['datetime']) + + for i in range(0,nlocs): + temp = dt.datetime.strptime(time[i], '%Y-%m-%dT%H:%M:%SZ') + time[i] = temp.strftime('%Y%m%d_%H%M%S') + + ioda_df['datetime'] = time + + #set up MPR data + mpr_data = [] + + for var_name in hofx_vars: + + # Set up the needed columns + ioda_df_var = ioda_df[['datetime','station_id',var_name+'@ObsType', + 'latitude','longitude','air_pressure', + var_name+'@hofx',var_name+'@ObsValue', + var_name+'@EffectiveQC']] + + # Cute down to locations with valid ObsValues + ioda_df_var = ioda_df_var[abs(ioda_df_var[var_name+'@ObsValue']) < 1e6] + nlocs = len(ioda_df_var.index) + print(var_name+' has '+str(nlocs)+' valid obs.') + + # Add additional columns + ioda_df_var['lead'] = '000000' + ioda_df_var['MPR'] = 'MPR' + ioda_df_var['nobs'] = nlocs + ioda_df_var['index'] = range(0,nlocs) + ioda_df_var['varname'] = var_name + ioda_df_var['na'] = 'NA' + + # Arrange columns in MPR format + cols = ['na','na','lead','datetime','datetime','lead','datetime', + 'datetime','varname','na','lead','varname','na','na', + var_name+'@ObsType','na','na','lead','na','na','na','na','MPR', + 'nobs','index','station_id','latitude','longitude', + 'air_pressure','na',var_name+'@hofx',var_name+'@ObsValue', + var_name+'@EffectiveQC','na','na'] + + ioda_df_var = ioda_df_var[cols] + + # Into a list and all to strings + mpr_data = mpr_data + [list( map(str,i) ) for i in ioda_df_var.values.tolist() ] + + print("Total Length:\t" + repr(len(mpr_data))) + + except NameError: + print("Can't find the input file.") + print("HofX variables in this file:\t" + repr(hofx_vars)) +else: + print("ERROR: read_iodav2_mpr.py -> Must specify input file.\n") + sys.exit(1) + +######################################################################## + diff --git a/parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf b/parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf new file mode 100644 index 0000000000..bbca987ddf --- /dev/null +++ b/parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.conf @@ -0,0 +1,85 @@ +[config] + +# Documentation for this use case can be found at +# https://metplus.readthedocs.io/en/latest/generated/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed.html + +# For additional information, please see the METplus Users Guide. +# https://metplus.readthedocs.io/en/latest/Users_Guide + +### +# Processes to run +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#process-list +### + +PROCESS_LIST = StatAnalysis + + +### +# Time Info +# LOOP_BY options are INIT, VALID, RETRO, and REALTIME +# If set to INIT or RETRO: +# INIT_TIME_FMT, INIT_BEG, INIT_END, and INIT_INCREMENT must also be set +# If set to VALID or REALTIME: +# VALID_TIME_FMT, VALID_BEG, VALID_END, and VALID_INCREMENT must also be set +# LEAD_SEQ is the list of forecast leads to process +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#timing-control +### + +LOOP_BY = VALID + +VALID_TIME_FMT = %Y%m%d%H +VALID_BEG=2021050500 +VALID_END=2021050500 +VALID_INCREMENT = 6H + +LEAD_SEQ = 0 + + +### +# File I/O +# https://metplus.readthedocs.io/en/latest/Users_Guide/systemconfiguration.html#directory-and-filename-template-info +### + +MODEL1_STAT_ANALYSIS_LOOKIN_DIR = python {PARM_BASE}/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed/ugrid_lfric_mpr.py {INPUT_BASE}/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed/fcst_data/lfric_ver_20210505_0000.nc {INPUT_BASE}/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed/obs_data + +STAT_ANALYSIS_OUTPUT_DIR = {OUTPUT_BASE}/StatAnalysis_UGRID +STAT_ANALYSIS_OUTPUT_TEMPLATE = job.out +MODEL1_STAT_ANALYSIS_DUMP_ROW_TEMPLATE = dump.out + + +### +# StatAnalysis Settings +# https://metplus.readthedocs.io/en/latest/Users_Guide/wrappers.html#statanalysis +### + +MODEL1 = NA +MODEL1_OBTYPE = NA + +STAT_ANALYSIS_JOB_NAME = aggregate_stat +STAT_ANALYSIS_JOB_ARGS = -out_line_type CNT -dump_row [dump_row_file] -line_type MPR -by FCST_VAR + +MODEL_LIST = +DESC_LIST = +FCST_LEAD_LIST = +OBS_LEAD_LIST = +FCST_VALID_HOUR_LIST = +FCST_INIT_HOUR_LIST = +OBS_VALID_HOUR_LIST = +OBS_INIT_HOUR_LIST = +FCST_VAR_LIST = +OBS_VAR_LIST = +FCST_UNITS_LIST = +OBS_UNITS_LIST = +FCST_LEVEL_LIST = +OBS_LEVEL_LIST = +VX_MASK_LIST = +INTERP_MTHD_LIST = +INTERP_PNTS_LIST = +FCST_THRESH_LIST = +OBS_THRESH_LIST = +COV_THRESH_LIST = +ALPHA_LIST = +LINE_TYPE_LIST = + +GROUP_LIST_ITEMS = +LOOP_LIST_ITEMS = MODEL_LIST diff --git a/parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed/ugrid_lfric_mpr.py b/parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed/ugrid_lfric_mpr.py new file mode 100644 index 0000000000..d78e17e82c --- /dev/null +++ b/parm/use_cases/model_applications/unstructured_grids/StatAnalysis_fcstLFRIC_UGRID_obsASCII_PyEmbed/ugrid_lfric_mpr.py @@ -0,0 +1,203 @@ +from __future__ import print_function + +import math +import pandas as pd +import numpy as np +import os +from glob import glob +import sys +import xarray as xr +import datetime as dt +import iris +from iris.experimental.ugrid import PARSE_UGRID_ON_LOAD +#geovista from https://github.com/bjlittle/geovista/ +import geovista as gv +import geovista.theme +from geovista.common import to_xyz +import netCDF4 +import pyvista as pv +from pykdtree.kdtree import KDTree + +from pathlib import Path +from typing import Optional + +import matplotlib.pyplot as plt + +print(f"{iris.__version__=}") +print(f"{gv.__version__=}") + +######################################################################## + +def read_ascii_obs(files): + paths = sorted(glob(files)) + datasets = [pd.read_table(p, header=None, delim_whitespace=True) for p in paths] + combined = pd.concat(datasets) + return combined + +def load_ugrid( + fname: str, + data: Optional[bool] = False, + constraint: Optional[str] = None, + verbose: Optional[bool] = False +) -> pv.PolyData: +# fname = BASE_DIR / fname + with PARSE_UGRID_ON_LOAD.context(): + cube = iris.load_cube(fname, constraint=constraint) + + if cube.ndim > 1: + cube = cube[(0,) * (cube.ndim - 1)] + + if verbose: + print(cube) + + data = cube.data if data else None + + face_node = cube.mesh.face_node_connectivity + indices = face_node.indices_by_location() + lons, lats = cube.mesh.node_coords + + mesh = gv.Transform.from_unstructured( + lons.points, + lats.points, + indices, + data=data, + start_index=face_node.start_index, + name=cube.name(), + ) + + if data is None: + mesh.active_scalars_name = None + + return mesh + +def info(mesh: pv.PolyData) -> None: + print(f"The mesh is a C{int(math.sqrt(mesh.n_cells / 6))}, with 6 panels, {int(mesh.n_cells / 6):,d} cells per panel, and {mesh.n_cells:,d} cells.") + +def find_nearest(tree, points, poi, k): + # lat/lon to xyz + xyz = to_xyz(*poi) + + # find the k nearest euclidean neighbours + dist, idxs = tree.query(xyz, k=k) + + if idxs.ndim > 1: + idxs = idxs[0] + + # retieve the associated xyz points of the k nearest neighbours + nearest = points[idxs] + + return xyz, nearest, idxs + +def to_centers(mesh: pv.PolyData) -> pv.PolyData: + tmp = mesh.copy() + tmp.clear_cell_data() + tmp.clear_point_data() + tmp.clear_field_data() + return tmp.cell_centers() + +######################################################################## +print('Python Script:\t', sys.argv[0]) + +# Input is directory of .nc lfric files and a directory of ascii obs filess + +if len(sys.argv) == 3: + # Read the input file as the first argument + input_fcst_dir = os.path.expandvars(sys.argv[1]) + input_obs_dir = os.path.expandvars(sys.argv[2]) + try: + print("Input Forecast Dir:\t" + repr(input_fcst_dir)) + print("Input Observations Dir:\t" + repr(input_obs_dir)) + + #Read all obs from directory + obs_data = read_ascii_obs(input_obs_dir+'/*.ascii') + print(obs_data.shape) + obs_data = obs_data.iloc[::1000, :]#thin for testing + obs_data = obs_data.rename(columns={0:'message_type', 1:'station_id', 2:'obs_valid_time', 3:'obs_lat', 4:'obs_lon', \ + 5:'elevation', 6:'var_name', 7:'level', 8:'height', 9:'qc_string', 10:'obs_value'}) + + obs_vars = ['UGRD', 'VGRD', 'TMP', 'RH'] + fcst_vars = ['u10m', 'v10m', 't1p5m', 'rh1p5m'] + + #open the netcdf forecast to access data values and list of times + fcst_data = xr.open_dataset(input_fcst_dir) + fcst_times = pd.to_datetime(fcst_data.coords['time_centered']) + + match_df = pd.DataFrame(columns=['message_type', 'station_id', 'obs_valid_time', 'obs_lat', 'obs_lon', \ + 'elevation', 'var_name', 'level', 'height', 'qc_string', 'obs_value', 'idx_nearest, fcst_value']) + + for idx1, (obs_var, fcst_var) in enumerate(zip(obs_vars, fcst_vars)): + + #load forecast as an iris cube + fcst_mesh = load_ugrid(input_fcst_dir, constraint=fcst_var) + info(fcst_mesh) + + #get indices of nearest cell center + fcst_centers = to_centers(fcst_mesh) + points = fcst_centers.points + tree = KDTree(points) + + #get the forecast data values loaded + fcst_df = fcst_data[fcst_var].to_dataframe() + print(fcst_df) + + #get obs data for variable + var_data = obs_data.loc[obs_data['var_name'] == obs_var].reset_index(drop=True) + + for idx2, row in var_data.iterrows(): + xyz, nearest, idx_nearest = find_nearest(tree, points, [row['obs_lat'], row['obs_lon']], k=1) + var_data.at[idx2,'idx_nearest'] = int(idx_nearest) + + #get the obs time, search for closest in the forecast data + time = dt.datetime.strptime(row['obs_valid_time'],'%Y%m%d_%H%M%S') + match_time = min(fcst_times, key=lambda d: abs(d - time)) + match_idx = np.argmin(np.abs(fcst_times - time)) + + #add matched fcst value to data + var_data.at[idx2, 'fcst_value'] = fcst_df.loc[(match_idx,int(idx_nearest)), fcst_var] + var_data.at[idx2, 'fcst_lat'] = fcst_df.loc[(match_idx,int(idx_nearest)), 'Mesh2d_face_x'] + var_data.at[idx2, 'fcst_lon'] = fcst_df.loc[(match_idx,int(idx_nearest)), 'Mesh2d_face_y'] + var_data.at[idx2, 'fcst_time'] = fcst_df.loc[(match_idx,int(idx_nearest)), 'time_centered'] + + #check results + #with pd.option_context('display.max_rows', None): + # print(var_data[['obs_lat','fcst_lat','obs_lon','fcst_lon','obs_value','fcst_value','obs_valid_time','fcst_time']]) + with pd.option_context('display.max_columns', 500, 'display.max_rows', 100, 'display.width', 500): + print(var_data) + ob_vals = var_data['obs_value'].values + f_vals = var_data['fcst_value'].values + + match_df = pd.concat([match_df, var_data], ignore_index=True) + + nlocs = len(match_df.index) + print('Number of locations in matched set: ' + str(nlocs)) + + # Add additional columns + match_df['lead'] = '000000' + match_df['MPR'] = 'MPR' + match_df['nobs'] = nlocs + match_df['index'] = range(0,nlocs) + match_df['na'] = 'NA' + match_df['QC'] = '0' + + # Arrange columns in MPR format + cols = ['na','na','lead','obs_valid_time','obs_valid_time','lead','obs_valid_time', + 'obs_valid_time','var_name','na','lead','var_name','na','na', + 'var_name','na','na','lead','na','na','na','na','MPR', + 'nobs','index','station_id','obs_lat','obs_lon', + 'level','na','fcst_value','obs_value', + 'QC','na','na'] + + match_df = match_df[cols] + + # Into a list and all to strings + mpr_data = [list( map(str,i) ) for i in match_df.values.tolist() ] + + except NameError: + print("Can't find the input files or the variables.") + print("Variables in this file:\t" + repr(var_list)) +else: + print("ERROR: ugrid_lfric_mpr.py -> Must specify directory of files.\n") + sys.exit(1) + +######################################################################## +