Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor dataset wrapper #1

Closed
wants to merge 22 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
f27a91d
fix for file reading; includes removing chunk reading and decoding ti…
bkremmli May 17, 2024
a08d9bc
Merge branch 'read_error' of https://github.com/bkremmli/satpy into r…
bkremmli May 17, 2024
2dbae1a
remove import datetime
bkremmli May 17, 2024
eb0382a
add bkremmli to AUTHORS.md
bkremmli May 17, 2024
7d6608a
correct for failures from hook id ruff
bkremmli May 17, 2024
ec22136
minor adaptations from PR comments
bkremmli May 17, 2024
5beedea
Update satpy/readers/mviri_l1b_fiduceo_nc.py
bkremmli May 17, 2024
21679c6
perform chunking after open_dataset and use decode_cf = False
bkremmli May 24, 2024
a7cb10d
Merge branch 'read_error' of https://github.com/bkremmli/satpy into r…
bkremmli May 24, 2024
59880ce
decode times separatly from other variables, adds TestInterpolator
bkremmli May 28, 2024
fb93f00
fixes _decode_cf() and tests
bkremmli Jun 4, 2024
951c9b0
adds test_fix_duplicate_dimensions and removes leftover dimensions "s…
bkremmli Jun 5, 2024
a379a08
Update mviri_l1b_fiduceo_nc.py
bkremmli Jun 5, 2024
5ec0cf9
adds support for filenames of MVIRI FCDR L1.5 release 2
bkremmli Jun 6, 2024
73acfb7
Merge pull request #1 from bkremmli/mviri_release2
bkremmli Jun 6, 2024
01856fd
Merge branch 'pytroll:main' into read_error
bkremmli Sep 3, 2024
c1cd334
sync/merge with fork diffs
bkremmli Sep 3, 2024
abf916e
adapt changes after xarray release 2024.7.0: include chunks with open…
bkremmli Sep 3, 2024
5822e50
removed chunks part from test_fix_duplicate_dimensions; adapted tests…
bkremmli Sep 3, 2024
f41e068
moved code _get_projection_longitude()
bkremmli Sep 4, 2024
6b079b1
fix test_fix_duplicate_dimensions
bkremmli Sep 4, 2024
1b40cd3
Refactor dataset wrapper
sfinkens Sep 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ The following people have made contributions to this project:
- [Sauli Joro (sjoro)](https://github.com/sjoro)
- [Pouria Khalaj](https://github.com/pkhalaj)
- [Janne Kotro (jkotro)](https://github.com/jkotro)
- [Beke Kremmling (bkremmli)](https://github.com/bkremmli) - Deutscher Wetterdienst
- [Ralph Kuehn (ralphk11)](https://github.com/ralphk11)
- [Panu Lahtinen (pnuu)](https://github.com/pnuu)
- [Jussi Leinonen (jleinonen)](https://github.com/jleinonen) - meteoswiss
Expand Down
8 changes: 6 additions & 2 deletions satpy/etc/readers/mviri_l1b_fiduceo_nc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,18 @@ file_types:
nc_easy:
file_reader: !!python/name:satpy.readers.mviri_l1b_fiduceo_nc.FiduceoMviriEasyFcdrFileHandler
file_patterns: [
'FIDUCEO_FCDR_{level}_{sensor}_{platform}-{projection_longitude:f}_{start_time:%Y%m%d%H%M}_{end_time:%Y%m%d%H%M}_EASY_{processor_version}_{format_version}.nc'
'FIDUCEO_FCDR_{level}_{sensor}_{platform}-{projection_longitude:f}_{start_time:%Y%m%d%H%M}_{end_time:%Y%m%d%H%M}_EASY_{processor_version}_{format_version}.nc',
# Example: FIDUCEO_FCDR_L15_MVIRI_MET7-57.0_201701201000_201701201030_EASY_v2.6_fv3.1.nc
'{sensor}_FCDR-EASY_{level}_{platform}-E{projection_longitude:s}_{start_time:%Y%m%d%H%M}_{end_time:%Y%m%d%H%M}_{release}.nc'
# Example: MVIRI_FCDR-EASY_L15_MET7-E0000_200607060600_200607060630_0200.nc
]
nc_full:
file_reader: !!python/name:satpy.readers.mviri_l1b_fiduceo_nc.FiduceoMviriFullFcdrFileHandler
file_patterns: [
'FIDUCEO_FCDR_{level}_{sensor}_{platform}-{projection_longitude:f}_{start_time:%Y%m%d%H%M}_{end_time:%Y%m%d%H%M}_FULL_{processor_version}_{format_version}.nc'
'FIDUCEO_FCDR_{level}_{sensor}_{platform}-{projection_longitude:f}_{start_time:%Y%m%d%H%M}_{end_time:%Y%m%d%H%M}_FULL_{processor_version}_{format_version}.nc',
# Example: FIDUCEO_FCDR_L15_MVIRI_MET7-57.0_201701201000_201701201030_FULL_v2.6_fv3.1.nc
'{sensor}_FCDR-FULL_{level}_{platform}-E{projection_longitude:s}_{start_time:%Y%m%d%H%M}_{end_time:%Y%m%d%H%M}_{release}.nc'
# Example: MVIRI_FCDR-FULL_L15_MET7-E0000_200607060600_200607060630_0200.nc
]

datasets:
Expand Down
167 changes: 110 additions & 57 deletions satpy/readers/mviri_l1b_fiduceo_nc.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,9 @@

from satpy.readers._geos_area import get_area_definition, get_area_extent, sampling_to_lfac_cfac
from satpy.readers.file_handlers import BaseFileHandler
from satpy.utils import get_legacy_chunk_size
from satpy.utils import get_chunk_size_limit

CHUNK_SIZE = get_legacy_chunk_size()
CHUNK_SIZE = get_chunk_size_limit()
EQUATOR_RADIUS = 6378140.0
POLE_RADIUS = 6356755.0
ALTITUDE = 42164000.0 - EQUATOR_RADIUS
Expand Down Expand Up @@ -452,84 +452,126 @@ def is_high_resol(resolution):
return resolution == HIGH_RESOL


class DatasetWrapper:
"""Helper class for accessing the dataset."""

def __init__(self, nc):
"""Wrap the given dataset."""
self.nc = nc

@property
def attrs(self):
"""Exposes dataset attributes."""
return self.nc.attrs

def __getitem__(self, item):
"""Get a variable from the dataset."""
ds = self.nc[item]
if self._should_dims_be_renamed(ds):
ds = self._rename_dims(ds)
elif self._coordinates_not_assigned(ds):
ds = self._reassign_coords(ds)
class DatasetPreprocessor:
def preprocess(self, ds):
ds = self._rename_vars(ds)
ds = self._decode_cf(ds)
ds = self._fix_duplicate_dimensions(ds)
self._reassign_coords(ds)
self._cleanup_attrs(ds)
return ds

def _should_dims_be_renamed(self, ds):
"""Determine whether dataset dimensions need to be renamed."""
return "y_ir_wv" in ds.dims or "y_tie" in ds.dims

def _rename_dims(self, ds):
"""Rename dataset dimensions to match satpy's expectations."""
def _rename_vars(self, ds):
"""Rename variables to match satpy's expectations."""
new_names = {
"y_ir_wv": "y",
"x_ir_wv": "x",
"y_tie": "y",
"x_tie": "x"
"time_ir_wv": "time",
}
for old_name, new_name in new_names.items():
if old_name in ds.dims:
ds = ds.rename({old_name: new_name})
new_names_avail = {
old: new
for old, new in new_names.items()
if old in ds
}
return ds.rename(new_names_avail)

def _decode_cf(self, ds):
# remove time before decoding and add again.
time_dims, time = self._decode_time(ds)
ds = ds.drop_vars(time.name)
ds = xr.decode_cf(ds)
ds[time.name] = (time_dims, time.values)
return ds

def _coordinates_not_assigned(self, ds):
return "y" in ds.dims and "y" not in ds.coords
def _decode_time(self, ds):
time = ds["time"]
time_dims = time.dims
time = xr.where(time == time.attrs["_FillValue"], np.datetime64("NaT"),
(time + time.attrs["add_offset"]).astype("datetime64[s]").astype("datetime64[ns]"))
return (time_dims, time)

def _fix_duplicate_dimensions(self, ds):
ds = ds.copy()
ds.variables["covariance_spectral_response_function_vis"].dims = ("srf_size_1", "srf_size_2")
ds = ds.drop_dims("srf_size")
ds.variables["channel_correlation_matrix_independent"].dims = ("channel_1", "channel_2")
ds.variables["channel_correlation_matrix_structured"].dims = ("channel_1", "channel_2")
ds = ds.drop_dims("channel")
return ds

def _reassign_coords(self, ds):
"""Re-assign coordinates.

For some reason xarray doesn't assign coordinates to all high
resolution data variables.
"""
return ds.assign_coords({"y": self.nc.coords["y"],
"x": self.nc.coords["x"]})
for var_name, data_array in ds.data_vars.items():
if self._coordinates_not_assigned(data_array):
ds[var_name] = data_array.assign_coords(
{
"y": ds.coords["y"],
"x": ds.coords["x"]
}
)

def _coordinates_not_assigned(self, ds):
return "y" in ds.dims and "y" not in ds.coords

def _cleanup_attrs(self, ds):
"""Cleanup dataset attributes."""
# Remove ancillary_variables attribute to avoid downstream
# satpy warnings.
ds.attrs.pop("ancillary_variables", None)
for data_array in ds.data_vars.values():
data_array.attrs.pop("ancillary_variables", None)

def get_time(self):
"""Get time coordinate.

Variable is sometimes named "time" and sometimes "time_ir_wv".
"""
try:
return self["time_ir_wv"]
except KeyError:
return self["time"]
class DatasetAccessor:
"""Helper class for accessing the dataset."""

def __init__(self, ds):
"""Wrap the given dataset."""
self.ds = ds

@property
def attrs(self):
"""Exposes dataset attributes."""
return self.ds.attrs

def __getitem__(self, item):
"""Get a variable from the dataset."""
data_array = self.ds[item]
if self._should_dims_be_renamed(data_array):
return self._rename_dims(data_array)
return data_array

def _should_dims_be_renamed(self, data_array):
"""Determine whether dataset dimensions need to be renamed."""
return "y_ir_wv" in data_array.dims or "y_tie" in data_array.dims

def _rename_dims(self, data_array):
"""Rename dataset dimensions to match satpy's expectations."""
new_names = {
"y_ir_wv": "y",
"x_ir_wv": "x",
"y_tie": "y",
"x_tie": "x"
}
new_names_avail = {
old: new
for old, new in new_names.items()
if old in data_array.dims
}
return data_array.rename(new_names_avail)

def get_xy_coords(self, resolution):
"""Get x and y coordinates for the given resolution."""
if is_high_resol(resolution):
return self.nc.coords["x"], self.nc.coords["y"]
return self.nc.coords["x_ir_wv"], self.nc.coords["x_ir_wv"]
return self.ds.coords["x"], self.ds.coords["y"]
return self.ds.coords["x_ir_wv"], self.ds.coords["x_ir_wv"]

def get_image_size(self, resolution):
"""Get image size for the given resolution."""
if is_high_resol(resolution):
return self.nc.coords["y"].size
return self.nc.coords["y_ir_wv"].size
return self.ds.coords["y"].size
return self.ds.coords["y_ir_wv"].size


class FiduceoMviriBase(BaseFileHandler):
Expand Down Expand Up @@ -558,13 +600,17 @@ def __init__(self, filename, filename_info, filetype_info, # noqa: D417
chunks={"x": CHUNK_SIZE,
"y": CHUNK_SIZE,
"x_ir_wv": CHUNK_SIZE,
"y_ir_wv": CHUNK_SIZE}
"y_ir_wv": CHUNK_SIZE},
decode_cf=False,
decode_times=False,
mask_and_scale=False,
)
self.nc = DatasetWrapper(nc_raw)

# Projection longitude is not provided in the file, read it from the
# filename.
self.projection_longitude = float(filename_info["projection_longitude"])
nc_preproc = DatasetPreprocessor().preprocess(nc_raw)
self.nc = DatasetAccessor(nc_preproc)

self.projection_longitude = self._get_projection_longitude(filename_info)

self.calib_coefs = self._get_calib_coefs()

self._get_angles = functools.lru_cache(maxsize=8)(
Expand All @@ -574,6 +620,13 @@ def __init__(self, filename, filename_info, filetype_info, # noqa: D417
self._get_acq_time_uncached
)

def _get_projection_longitude(self, filename_info):
"""Read projection longitude from filename as it is not provided in the file."""
if "." in str(filename_info["projection_longitude"]):
return float(filename_info["projection_longitude"])
else:
return float(filename_info["projection_longitude"]) / 100

def get_dataset(self, dataset_id, dataset_info):
"""Get the dataset."""
name = dataset_id["name"]
Expand Down Expand Up @@ -703,7 +756,7 @@ def _get_acq_time_uncached(self, resolution):
Note that the acquisition time does not increase monotonically
with the scanline number due to the scan pattern and rectification.
"""
time2d = self.nc.get_time()
time2d = self.nc["time"]
_, target_y = self.nc.get_xy_coords(resolution)
return Interpolator.interp_acq_time(time2d, target_y=target_y.values)

Expand Down
Loading
Loading