diff --git a/requirements.txt b/requirements.txt index 4171f714..49709256 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,18 +1,13 @@ -setuptools>=59.4.0 -pyyaml>=6.0 -pycodestyle>=2.8.0 -netCDF4>=1.5.3 -matplotlib>=3.9.0 -cartopy>=0.21.1 -scipy>=1.9.3 -xarray>=2022.3.0 -pandas>=1.4.0 -numpy>=2.0.0 - -# Not explicitly part of eva but dependcies of eva dependencies already in spack-stack -# versions need to be set to avoid other versions being picked -pyproj==3.1.0 -importlib-metadata==4.8.2 +setuptools +pyyaml +pycodestyle +netCDF4 +matplotlib +cartopy +scipy +xarray +pandas +numpy # Additional packages git+https://github.com/NOAA-EMC/emcpy.git@f7b863d9508b921a78d7ff0e53de0b95e9a176f7#egg=emcpy diff --git a/requirements_gmao.txt b/requirements_gmao.txt new file mode 100644 index 00000000..866d191c --- /dev/null +++ b/requirements_gmao.txt @@ -0,0 +1,26 @@ +setuptools>=59.4.0 +pyyaml>=6.0 +pycodestyle>=2.8.0 +netCDF4 +matplotlib +cartopy>=0.21.1 +scipy>=1.9.3 +xarray>=2022.3.0 +pandas>=1.4.0 +numpy==1.22.3 + +# Not explicitly part of eva but dependcies of eva dependencies already in spack-stack +# versions need to be set to avoid other versions being picked +pyproj +importlib-metadata==4.8.2 +contourpy==1.0.7 + +# Additional packages +git+https://github.com/NOAA-EMC/emcpy.git@f7b863d9508b921a78d7ff0e53de0b95e9a176f7#egg=emcpy +scikit-learn +seaborn +hvplot +nbconvert +bokeh +geopandas +geoviews diff --git a/requirements_sles15.txt b/requirements_sles15.txt new file mode 100644 index 00000000..2e76be15 --- /dev/null +++ b/requirements_sles15.txt @@ -0,0 +1,28 @@ +setuptools>=59.4.0 +pyyaml>=6.0 +pycodestyle>=2.8.0 +netCDF4 +matplotlib +cartopy>=0.21.1 +scipy>=1.9.3 +xarray>=2022.3.0 +pandas>=1.4.0 +numpy==1.22.3 +attrs==21.4.0 + +# Not explicitly part of eva but dependcies of eva dependencies already in spack-stack +# versions need to be set to avoid other versions being picked +pyproj +importlib_metadata==7.1.0 +contourpy==1.0.7 +msgpack>=1.0.0 + +# Additional packages +git+https://github.com/NOAA-EMC/emcpy.git@f7b863d9508b921a78d7ff0e53de0b95e9a176f7#egg=emcpy +scikit-learn +seaborn +hvplot +nbconvert +bokeh +geopandas +geoviews diff --git a/src/eva/eva_driver.py b/src/eva/eva_driver.py index d0927627..fce4cdbf 100644 --- a/src/eva/eva_driver.py +++ b/src/eva/eva_driver.py @@ -13,19 +13,22 @@ import argparse import os from collections import defaultdict +import xarray as xr +import numpy as np from eva.utilities.config import get from eva.utilities.logger import Logger from eva.utilities.timing import Timing from eva.data.data_driver import data_driver +from eva.time_series.time_series import add_empty_to_timeseries from eva.time_series.time_series import collapse_collection_to_time_series +from eva.time_series.time_series_utils import create_empty_data, get_filename, check_file from eva.transforms.transform_driver import transform_driver from eva.plotting.batch.base.plot_tools.figure_driver import figure_driver from eva.data.data_collections import DataCollections from eva.utilities.duration import iso_duration_to_timedelta from eva.utilities.utils import load_yaml_file - # -------------------------------------------------------------------------------------------------- @@ -160,14 +163,29 @@ def read_transform_time_series(logger, timing, eva_dict, data_collections): if name == time_series_config['collection']: transform_dict['transforms'].append(transform) - # Assert that datasets_config is the same length as dates - logger.assert_abort(len(datasets_config) == len(dates), 'When running in time ' + - 'series mode the number of datasets must be the same as the ' + - 'number of dates.') + # Check if first file is empty. If it is, abort. + empty_dataset_config = datasets_config[0] + filename = get_filename(empty_dataset_config, logger) + check_file(filename, logger) # Loop over datasets reading each one in turn, internally appending the data_collections for ind, dataset_config in enumerate(datasets_config): + # Pull out information to check for missing date + date = dates[ind] + + # Check if file exists, if not add empty and continue + filename = get_filename(dataset_config, logger) + if not os.path.isfile(filename): + add_empty_to_timeseries(logger, date, ind, timing, time_series_config, + empty_dataset_config, data_collections) + continue + # Check if file exists but is size zero, add empty and continue + elif os.stat(filename).st_size == 0: + add_empty_to_timeseries(logger, date, ind, timing, time_series_config, + empty_dataset_config, data_collections) + continue + # Create a temporary collection for this time step data_collections_tmp = DataCollections() @@ -185,7 +203,6 @@ def read_transform_time_series(logger, timing, eva_dict, data_collections): timing.stop('TransformDriverExecute') # Collapse data into time series - date = dates[ind] collapse_collection_to_time_series(logger, ind, date, time_series_config, data_collections, data_collections_tmp) diff --git a/src/eva/tests/config/testIodaObsSpaceAmsuaN19_TimeSeries.yaml b/src/eva/tests/config/testIodaObsSpaceAmsuaN19_TimeSeries.yaml index 0d085cf4..decc7f38 100644 --- a/src/eva/tests/config/testIodaObsSpaceAmsuaN19_TimeSeries.yaml +++ b/src/eva/tests/config/testIodaObsSpaceAmsuaN19_TimeSeries.yaml @@ -10,6 +10,15 @@ datasets: - name: ObsValue variables: &variables [brightnessTemperature] - name: hofx + #Empty + - name: experiment + type: IodaObsSpace + filenames: + - ${data_input_path}/ioda_obs_space.amsua_n19.hofx.2020-12-14T000000Z.nc4 + channels: *channels + groups: + - name: ObsValue + - name: hofx - name: experiment type: IodaObsSpace filenames: @@ -31,7 +40,7 @@ transforms: time_series: - begin_date: '2020-12-14T21:00:00' - final_date: '2020-12-15T03:00:00' + final_date: '2020-12-15T09:00:00' interval: 'PT6H' collection: experiment diff --git a/src/eva/tests/config/testJediVariationalBiasCorrectionAmsuaN19.yaml b/src/eva/tests/config/testJediVariationalBiasCorrectionAmsuaN19.yaml index c993a325..f57e2867 100644 --- a/src/eva/tests/config/testJediVariationalBiasCorrectionAmsuaN19.yaml +++ b/src/eva/tests/config/testJediVariationalBiasCorrectionAmsuaN19.yaml @@ -11,10 +11,16 @@ datasets: bias_file: ${data_input_path}/gsi.x0048v2.bc.amsua_n19.2021-12-11T21:00:00Z.satbias lapse_file: ${data_input_path}/gsi.x0048v2.bc.amsua_n19.2021-12-11T21:00:00Z.tlapse + # Empty + - name: experiment + type: JediVariationalBiasCorrection + bias_file: ${data_input_path}/gsi.x0048v2.bc.amsua_n19.2021-12-12T03:00:00Z.satbias + lapse_file: ${data_input_path}/gsi.x0048v2.bc.amsua_n19.2021-12-12T03:00:00Z.tlapse + time_series: - - begin_date: '2020-12-15T00:00:00' - final_date: '2020-12-15T06:00:00' + - begin_date: '2021-12-11T15:00:00' + final_date: '2021-12-12T03:00:00' interval: 'PT6H' collection: experiment diff --git a/src/eva/time_series/time_series.py b/src/eva/time_series/time_series.py index 41206531..fcc5225e 100644 --- a/src/eva/time_series/time_series.py +++ b/src/eva/time_series/time_series.py @@ -12,7 +12,8 @@ import numpy as np import xarray as xr - +from eva.data.data_collections import DataCollections +from eva.time_series.time_series_utils import create_empty_data # -------------------------------------------------------------------------------------------------- @@ -23,6 +24,17 @@ } +# -------------------------------------------------------------------------------------------------- + +def add_empty_to_timeseries(logger, date, ind, timing, time_series_config, + dataset_config, data_collections): + + ''' Add empty collection to timeseries for missing date ''' + empty_data_collection = create_empty_data(time_series_config, dataset_config, timing, logger) + collapse_collection_to_time_series(logger, ind, date, time_series_config, data_collections, + empty_data_collection) + + # -------------------------------------------------------------------------------------------------- diff --git a/src/eva/time_series/time_series_utils.py b/src/eva/time_series/time_series_utils.py new file mode 100644 index 00000000..a9869600 --- /dev/null +++ b/src/eva/time_series/time_series_utils.py @@ -0,0 +1,43 @@ +import os +import numpy as np +import xarray as xr +from eva.data.data_driver import data_driver +from eva.data.data_collections import DataCollections + + +filename_retrieval = { + "IodaObsSpace": lambda dataset_config: dataset_config["filenames"][0], + "JediVariationalBiasCorrection": lambda dataset_config: dataset_config["bias_file"], +} + + +def get_filename(dataset_config, logger): + """ Retrieve filename using given type """ + + dataset_type = dataset_config["type"] + logger.assert_abort(dataset_type in filename_retrieval, + f'Unknown dataset_type {dataset_type}') + filename = filename_retrieval[dataset_type](dataset_config) + return filename + + +def check_file(filename, logger): + """ Check if first file exists and is nonzero """ + + if not os.path.isfile(filename): + logger.abort('First file provided to timeseries must exist.') + elif os.stat(filename).st_size == 0: + logger.abort('First file provided to timeseries must be nonzero.') + + +def create_empty_data(timeseries_config, dataset_config, timing, logger): + """ Creating an empty data collection to use for missing cycle times """ + + dc_tmp = DataCollections() + collection = timeseries_config["collection"] + data_driver(dataset_config, dc_tmp, timing, logger) + dataset = dc_tmp.get_data_collection(collection) + empty_data = xr.full_like(dataset, np.nan) + dc = DataCollections() + dc.create_or_add_to_collection(collection, empty_data) + return dc diff --git a/src/eva/utilities/utils.py b/src/eva/utilities/utils.py index 99b68a85..5490a26c 100644 --- a/src/eva/utilities/utils.py +++ b/src/eva/utilities/utils.py @@ -6,7 +6,6 @@ # -------------------------------------------------------------------------------------------------- - import re import string import yaml