diff --git a/AUTHORS.md b/AUTHORS.md index d7e78eecae..3594a1225e 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -45,6 +45,7 @@ The following people have made contributions to this project: - [Sauli Joro (sjoro)](https://github.com/sjoro) - [Pouria Khalaj](https://github.com/pkhalaj) - [Janne Kotro (jkotro)](https://github.com/jkotro) +- [Beke Kremmling (bkremmli)](https://github.com/bkremmli) - Deutscher Wetterdienst - [Ralph Kuehn (ralphk11)](https://github.com/ralphk11) - [Panu Lahtinen (pnuu)](https://github.com/pnuu) - [Jussi Leinonen (jleinonen)](https://github.com/jleinonen) - meteoswiss diff --git a/satpy/etc/readers/mviri_l1b_fiduceo_nc.yaml b/satpy/etc/readers/mviri_l1b_fiduceo_nc.yaml index ec3c5cab77..da30cb2545 100644 --- a/satpy/etc/readers/mviri_l1b_fiduceo_nc.yaml +++ b/satpy/etc/readers/mviri_l1b_fiduceo_nc.yaml @@ -20,14 +20,18 @@ file_types: nc_easy: file_reader: !!python/name:satpy.readers.mviri_l1b_fiduceo_nc.FiduceoMviriEasyFcdrFileHandler file_patterns: [ - 'FIDUCEO_FCDR_{level}_{sensor}_{platform}-{projection_longitude:f}_{start_time:%Y%m%d%H%M}_{end_time:%Y%m%d%H%M}_EASY_{processor_version}_{format_version}.nc' + 'FIDUCEO_FCDR_{level}_{sensor}_{platform}-{projection_longitude:f}_{start_time:%Y%m%d%H%M}_{end_time:%Y%m%d%H%M}_EASY_{processor_version}_{format_version}.nc', # Example: FIDUCEO_FCDR_L15_MVIRI_MET7-57.0_201701201000_201701201030_EASY_v2.6_fv3.1.nc + '{sensor}_FCDR-EASY_{level}_{platform}-E{projection_longitude:s}_{start_time:%Y%m%d%H%M}_{end_time:%Y%m%d%H%M}_{release}.nc' + # Example: MVIRI_FCDR-EASY_L15_MET7-E0000_200607060600_200607060630_0200.nc ] nc_full: file_reader: !!python/name:satpy.readers.mviri_l1b_fiduceo_nc.FiduceoMviriFullFcdrFileHandler file_patterns: [ - 'FIDUCEO_FCDR_{level}_{sensor}_{platform}-{projection_longitude:f}_{start_time:%Y%m%d%H%M}_{end_time:%Y%m%d%H%M}_FULL_{processor_version}_{format_version}.nc' + 'FIDUCEO_FCDR_{level}_{sensor}_{platform}-{projection_longitude:f}_{start_time:%Y%m%d%H%M}_{end_time:%Y%m%d%H%M}_FULL_{processor_version}_{format_version}.nc', # Example: FIDUCEO_FCDR_L15_MVIRI_MET7-57.0_201701201000_201701201030_FULL_v2.6_fv3.1.nc + '{sensor}_FCDR-FULL_{level}_{platform}-E{projection_longitude:s}_{start_time:%Y%m%d%H%M}_{end_time:%Y%m%d%H%M}_{release}.nc' + # Example: MVIRI_FCDR-FULL_L15_MET7-E0000_200607060600_200607060630_0200.nc ] datasets: diff --git a/satpy/readers/mviri_l1b_fiduceo_nc.py b/satpy/readers/mviri_l1b_fiduceo_nc.py index fc5aea2c8e..4a619ac866 100644 --- a/satpy/readers/mviri_l1b_fiduceo_nc.py +++ b/satpy/readers/mviri_l1b_fiduceo_nc.py @@ -162,9 +162,9 @@ from satpy.readers._geos_area import get_area_definition, get_area_extent, sampling_to_lfac_cfac from satpy.readers.file_handlers import BaseFileHandler -from satpy.utils import get_legacy_chunk_size +from satpy.utils import get_chunk_size_limit -CHUNK_SIZE = get_legacy_chunk_size() +CHUNK_SIZE = get_chunk_size_limit() EQUATOR_RADIUS = 6378140.0 POLE_RADIUS = 6356755.0 ALTITUDE = 42164000.0 - EQUATOR_RADIUS @@ -452,47 +452,50 @@ def is_high_resol(resolution): return resolution == HIGH_RESOL -class DatasetWrapper: - """Helper class for accessing the dataset.""" - - def __init__(self, nc): - """Wrap the given dataset.""" - self.nc = nc - - @property - def attrs(self): - """Exposes dataset attributes.""" - return self.nc.attrs - - def __getitem__(self, item): - """Get a variable from the dataset.""" - ds = self.nc[item] - if self._should_dims_be_renamed(ds): - ds = self._rename_dims(ds) - elif self._coordinates_not_assigned(ds): - ds = self._reassign_coords(ds) +class DatasetPreprocessor: + def preprocess(self, ds): + ds = self._rename_vars(ds) + ds = self._decode_cf(ds) + ds = self._fix_duplicate_dimensions(ds) + self._reassign_coords(ds) self._cleanup_attrs(ds) return ds - def _should_dims_be_renamed(self, ds): - """Determine whether dataset dimensions need to be renamed.""" - return "y_ir_wv" in ds.dims or "y_tie" in ds.dims - - def _rename_dims(self, ds): - """Rename dataset dimensions to match satpy's expectations.""" + def _rename_vars(self, ds): + """Rename variables to match satpy's expectations.""" new_names = { - "y_ir_wv": "y", - "x_ir_wv": "x", - "y_tie": "y", - "x_tie": "x" + "time_ir_wv": "time", } - for old_name, new_name in new_names.items(): - if old_name in ds.dims: - ds = ds.rename({old_name: new_name}) + new_names_avail = { + old: new + for old, new in new_names.items() + if old in ds + } + return ds.rename(new_names_avail) + + def _decode_cf(self, ds): + # remove time before decoding and add again. + time_dims, time = self._decode_time(ds) + ds = ds.drop_vars(time.name) + ds = xr.decode_cf(ds) + ds[time.name] = (time_dims, time.values) return ds - def _coordinates_not_assigned(self, ds): - return "y" in ds.dims and "y" not in ds.coords + def _decode_time(self, ds): + time = ds["time"] + time_dims = time.dims + time = xr.where(time == time.attrs["_FillValue"], np.datetime64("NaT"), + (time + time.attrs["add_offset"]).astype("datetime64[s]").astype("datetime64[ns]")) + return (time_dims, time) + + def _fix_duplicate_dimensions(self, ds): + ds = ds.copy() + ds.variables["covariance_spectral_response_function_vis"].dims = ("srf_size_1", "srf_size_2") + ds = ds.drop_dims("srf_size") + ds.variables["channel_correlation_matrix_independent"].dims = ("channel_1", "channel_2") + ds.variables["channel_correlation_matrix_structured"].dims = ("channel_1", "channel_2") + ds = ds.drop_dims("channel") + return ds def _reassign_coords(self, ds): """Re-assign coordinates. @@ -500,36 +503,75 @@ def _reassign_coords(self, ds): For some reason xarray doesn't assign coordinates to all high resolution data variables. """ - return ds.assign_coords({"y": self.nc.coords["y"], - "x": self.nc.coords["x"]}) + for var_name, data_array in ds.data_vars.items(): + if self._coordinates_not_assigned(data_array): + ds[var_name] = data_array.assign_coords( + { + "y": ds.coords["y"], + "x": ds.coords["x"] + } + ) + + def _coordinates_not_assigned(self, ds): + return "y" in ds.dims and "y" not in ds.coords def _cleanup_attrs(self, ds): """Cleanup dataset attributes.""" # Remove ancillary_variables attribute to avoid downstream # satpy warnings. - ds.attrs.pop("ancillary_variables", None) + for data_array in ds.data_vars.values(): + data_array.attrs.pop("ancillary_variables", None) - def get_time(self): - """Get time coordinate. - Variable is sometimes named "time" and sometimes "time_ir_wv". - """ - try: - return self["time_ir_wv"] - except KeyError: - return self["time"] +class DatasetAccessor: + """Helper class for accessing the dataset.""" + + def __init__(self, ds): + """Wrap the given dataset.""" + self.ds = ds + + @property + def attrs(self): + """Exposes dataset attributes.""" + return self.ds.attrs + + def __getitem__(self, item): + """Get a variable from the dataset.""" + data_array = self.ds[item] + if self._should_dims_be_renamed(data_array): + return self._rename_dims(data_array) + return data_array + + def _should_dims_be_renamed(self, data_array): + """Determine whether dataset dimensions need to be renamed.""" + return "y_ir_wv" in data_array.dims or "y_tie" in data_array.dims + + def _rename_dims(self, data_array): + """Rename dataset dimensions to match satpy's expectations.""" + new_names = { + "y_ir_wv": "y", + "x_ir_wv": "x", + "y_tie": "y", + "x_tie": "x" + } + new_names_avail = { + old: new + for old, new in new_names.items() + if old in data_array.dims + } + return data_array.rename(new_names_avail) def get_xy_coords(self, resolution): """Get x and y coordinates for the given resolution.""" if is_high_resol(resolution): - return self.nc.coords["x"], self.nc.coords["y"] - return self.nc.coords["x_ir_wv"], self.nc.coords["x_ir_wv"] + return self.ds.coords["x"], self.ds.coords["y"] + return self.ds.coords["x_ir_wv"], self.ds.coords["x_ir_wv"] def get_image_size(self, resolution): """Get image size for the given resolution.""" if is_high_resol(resolution): - return self.nc.coords["y"].size - return self.nc.coords["y_ir_wv"].size + return self.ds.coords["y"].size + return self.ds.coords["y_ir_wv"].size class FiduceoMviriBase(BaseFileHandler): @@ -558,13 +600,17 @@ def __init__(self, filename, filename_info, filetype_info, # noqa: D417 chunks={"x": CHUNK_SIZE, "y": CHUNK_SIZE, "x_ir_wv": CHUNK_SIZE, - "y_ir_wv": CHUNK_SIZE} + "y_ir_wv": CHUNK_SIZE}, + decode_cf=False, + decode_times=False, + mask_and_scale=False, ) - self.nc = DatasetWrapper(nc_raw) - # Projection longitude is not provided in the file, read it from the - # filename. - self.projection_longitude = float(filename_info["projection_longitude"]) + nc_preproc = DatasetPreprocessor().preprocess(nc_raw) + self.nc = DatasetAccessor(nc_preproc) + + self.projection_longitude = self._get_projection_longitude(filename_info) + self.calib_coefs = self._get_calib_coefs() self._get_angles = functools.lru_cache(maxsize=8)( @@ -574,6 +620,13 @@ def __init__(self, filename, filename_info, filetype_info, # noqa: D417 self._get_acq_time_uncached ) + def _get_projection_longitude(self, filename_info): + """Read projection longitude from filename as it is not provided in the file.""" + if "." in str(filename_info["projection_longitude"]): + return float(filename_info["projection_longitude"]) + else: + return float(filename_info["projection_longitude"]) / 100 + def get_dataset(self, dataset_id, dataset_info): """Get the dataset.""" name = dataset_id["name"] @@ -703,7 +756,7 @@ def _get_acq_time_uncached(self, resolution): Note that the acquisition time does not increase monotonically with the scanline number due to the scan pattern and rectification. """ - time2d = self.nc.get_time() + time2d = self.nc["time"] _, target_y = self.nc.get_xy_coords(resolution) return Interpolator.interp_acq_time(time2d, target_y=target_y.values) diff --git a/satpy/tests/reader_tests/test_mviri_l1b_fiduceo_nc.py b/satpy/tests/reader_tests/test_mviri_l1b_fiduceo_nc.py index 04694c145a..9ced865eed 100644 --- a/satpy/tests/reader_tests/test_mviri_l1b_fiduceo_nc.py +++ b/satpy/tests/reader_tests/test_mviri_l1b_fiduceo_nc.py @@ -28,6 +28,7 @@ import xarray as xr from pyproj import CRS from pyresample.geometry import AreaDefinition +from pytest_lazy_fixtures import lf as lazy_fixture from satpy.readers.mviri_l1b_fiduceo_nc import ( ALTITUDE, @@ -36,6 +37,7 @@ DatasetWrapper, FiduceoMviriEasyFcdrFileHandler, FiduceoMviriFullFcdrFileHandler, + Interpolator, ) from satpy.tests.utils import make_dataid @@ -61,8 +63,8 @@ {"sun_earth_distance_correction_applied": True, "sun_earth_distance_correction_factor": 1.} ) -acq_time_vis_exp = [np.datetime64("1970-01-01 00:30").astype("datetime64[ns]"), - np.datetime64("1970-01-01 00:30").astype("datetime64[ns]"), +acq_time_vis_exp = [np.datetime64("NaT").astype("datetime64[ns]"), + np.datetime64("NaT").astype("datetime64[ns]"), np.datetime64("1970-01-01 02:30").astype("datetime64[ns]"), np.datetime64("1970-01-01 02:30").astype("datetime64[ns]")] vis_counts_exp = xr.DataArray( @@ -79,6 +81,7 @@ }, attrs=attrs_exp ) + vis_rad_exp = xr.DataArray( np.array( [[np.nan, 18.56, 38.28, 58.], @@ -124,7 +127,7 @@ }, attrs=attrs_exp ) -acq_time_ir_wv_exp = [np.datetime64("1970-01-01 00:30").astype("datetime64[ns]"), +acq_time_ir_wv_exp = [np.datetime64("NaT"), np.datetime64("1970-01-01 02:30").astype("datetime64[ns]")] wv_counts_exp = xr.DataArray( np.array( @@ -272,8 +275,13 @@ def fixture_fake_dataset(): dtype=np.uint8 ) ) - time = np.arange(4) * 60 * 60 * 1e9 - time = time.astype("datetime64[ns]").reshape(2, 2) + + cov = da.from_array([[1, 2], [3, 4]]) + time = np.arange(4) * 60 * 60 + time[0] = 4294967295 + time[1] = 4294967295 + time = time.reshape(2, 2) + ds = xr.Dataset( data_vars={ "count_vis": (("y", "x"), count_vis), @@ -303,6 +311,9 @@ def fixture_fake_dataset(): "sub_satellite_longitude_end": np.nan, "sub_satellite_latitude_start": np.nan, "sub_satellite_latitude_end": 0.1, + "covariance_spectral_response_function_vis": (("srf_size", "srf_size"), cov), + "channel_correlation_matrix_independent": (("channel", "channel"), cov), + "channel_correlation_matrix_structured": (("channel", "channel"), cov) }, coords={ "y": [1, 2, 3, 4], @@ -310,13 +321,16 @@ def fixture_fake_dataset(): "y_ir_wv": [1, 2], "x_ir_wv": [1, 2], "y_tie": [1, 2], - "x_tie": [1, 2] - + "x_tie": [1, 2], }, attrs={"foo": "bar"} ) ds["count_ir"].attrs["ancillary_variables"] = "a_ir b_ir" ds["count_wv"].attrs["ancillary_variables"] = "a_wv b_wv" + ds["quality_pixel_bitmask"].encoding["chunksizes"] = (2, 2) + ds["time_ir_wv"].attrs["_FillValue"] = 4294967295 + ds["time_ir_wv"].attrs["add_offset"] = 0 + return ds @@ -547,17 +561,51 @@ def test_file_pattern(self, reader): "FIDUCEO_FCDR_L15_MVIRI_MET7-57.0_201701201000_201701201030_FULL_v2.6_fv3.1.nc", "FIDUCEO_FCDR_L15_MVIRI_MET7-57.0_201701201000_201701201030_EASY_v2.6_fv3.1.nc", "FIDUCEO_FCDR_L15_MVIRI_MET7-00.0_201701201000_201701201030_EASY_v2.6_fv3.1.nc", + "MVIRI_FCDR-EASY_L15_MET7-E0000_200607060600_200607060630_0200.nc", + "MVIRI_FCDR-EASY_L15_MET7-E5700_200607060600_200607060630_0200.nc", + "MVIRI_FCDR-FULL_L15_MET7-E0000_200607060600_200607060630_0200.nc", "abcde", ] files = reader.select_files_from_pathnames(filenames) # only 3 out of 4 above should match - assert len(files) == 3 + assert len(files) == 6 class TestDatasetWrapper: """Unit tests for DatasetWrapper class.""" + def test_fix_duplicate_dimensions(self): + """Test the renaming of duplicate dimensions. + + If duplicate dimensions are within the Dataset, opening the datasets with chunks throws a warning. + The dimensions need to be renamed. + """ + foo_time = 60*60 + foo_time_exp = np.datetime64("1970-01-01 01:00").astype("datetime64[ns]") + + foo = xr.Dataset( + data_vars={ + "covariance_spectral_response_function_vis": (("srf_size", "srf_size"), [[1, 2], [3, 4]]), + "channel_correlation_matrix_independent": (("channel", "channel"), [[1, 2], [3, 4]]), + "channel_correlation_matrix_structured": (("channel", "channel"), [[1, 2], [3, 4]]), + "time_ir_wv": (("y_ir_wv", "x_ir_wv"), [[foo_time, foo_time], [foo_time, foo_time]], + {"_FillValue": 4294967295, "add_offset": 0}) + } + ) + foo_ds = DatasetWrapper(foo) + + foo_exp = xr.Dataset( + data_vars={ + "covariance_spectral_response_function_vis": (("srf_size_1", "srf_size_2"), [[1, 2], [3, 4]]), + "channel_correlation_matrix_independent": (("channel_1", "channel_2"), [[1, 2], [3, 4]]), + "channel_correlation_matrix_structured": (("channel_1", "channel_2"), [[1, 2], [3, 4]]), + "time_ir_wv": (("y_ir_wv", "x_ir_wv"), [[foo_time_exp, foo_time_exp], [foo_time_exp, foo_time_exp]]) + } + ) + + xr.testing.assert_allclose(foo_ds.nc, foo_exp) + def test_reassign_coords(self): """Test reassigning of coordinates. @@ -591,3 +639,63 @@ def test_reassign_coords(self): ds = DatasetWrapper(nc) foo = ds["foo"] xr.testing.assert_equal(foo, foo_exp) + +class TestInterpolator: + """Unit tests for Interpolator class.""" + @pytest.fixture(name="time_ir_wv") + def fixture_time_ir_wv(self): + """Returns time_ir_wv.""" + return xr.DataArray( + [ + [np.datetime64("1970-01-01 01:00"), np.datetime64("1970-01-01 02:00")], + [np.datetime64("1970-01-01 03:00"), np.datetime64("1970-01-01 04:00")], + [np.datetime64("NaT"), np.datetime64("1970-01-01 06:00")], + [np.datetime64("NaT"), np.datetime64("NaT")], + ], + dims=("y", "x"), + coords={"y": [1, 3, 5, 7]} + ) + + @pytest.fixture(name="acq_time_vis_exp") + def fixture_acq_time_vis_exp(self): + """Returns acq_time_vis_exp.""" + return xr.DataArray( + [ + np.datetime64("1970-01-01 01:30"), + np.datetime64("1970-01-01 01:30"), + np.datetime64("1970-01-01 03:30"), + np.datetime64("1970-01-01 03:30"), + np.datetime64("1970-01-01 06:00"), + np.datetime64("1970-01-01 06:00"), + np.datetime64("NaT"), + np.datetime64("NaT") + ], + dims="y", + coords={"y": [1, 2, 3, 4, 5, 6, 7, 8]} + ) + + @pytest.fixture(name="acq_time_ir_exp") + def fixture_acq_time_ir_exp(self): + """Returns acq_time_ir_exp.""" + return xr.DataArray( + [ + np.datetime64("1970-01-01 01:30"), + np.datetime64("1970-01-01 03:30"), + np.datetime64("1970-01-01 06:00"), + np.datetime64("NaT"), + ], + dims="y", + coords={"y": [1, 3, 5, 7]} + ) + + @pytest.mark.parametrize( + "acq_time_exp", + [ + lazy_fixture("acq_time_ir_exp"), + lazy_fixture("acq_time_vis_exp") + ] + ) + def test_interp_acq_time(self, time_ir_wv, acq_time_exp): + """Tests time interpolation.""" + res = Interpolator.interp_acq_time(time_ir_wv, target_y=acq_time_exp.coords["y"]) + xr.testing.assert_allclose(res, acq_time_exp)