From b472f3562ff9fafcde0c57c9ffac3ed476209a5e Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Fri, 26 Jul 2024 09:03:53 +0100 Subject: [PATCH 1/9] proof of concept load performance improvement --- lib/iris/experimental/ugrid/cf.py | 4 ++++ lib/iris/fileformats/cf.py | 15 +++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/lib/iris/experimental/ugrid/cf.py b/lib/iris/experimental/ugrid/cf.py index 281bdba878..b4d78b72dc 100644 --- a/lib/iris/experimental/ugrid/cf.py +++ b/lib/iris/experimental/ugrid/cf.py @@ -290,3 +290,7 @@ class CFUGridReader(cf.CFReader): ) CFGroup = CFUGridGroup + + def trim(self): + self._variable_types = cf.CFReader._variable_types + self.CFGroup = cf.CFGroup diff --git a/lib/iris/fileformats/cf.py b/lib/iris/fileformats/cf.py index 3247aa1960..f5b2d47c10 100644 --- a/lib/iris/fileformats/cf.py +++ b/lib/iris/fileformats/cf.py @@ -1089,6 +1089,9 @@ def __init__(self, file_source, warn=False, monotonic=False): self._check_monotonic = monotonic + if not self.has_meshes(): + self.trim() + self._translate() self._build_cf_groups() self._reset() @@ -1106,6 +1109,18 @@ def __exit__(self, exc_type, exc_value, traceback): # When used as a context-manager, **always** close the file on exit. self._close() + def has_meshes(self): + result = False + for _, variable in self._dataset.variables.items(): + cfr = getattr(variable, "cf_role", None) + print(cfr) + if cfr == "mesh_topology": + result = True + break + return result + def trim(self): + pass + @property def filename(self): """The file that the CFReader is reading.""" From 3ea81ea9d158b0e9afd7c021f0139df8de7056aa Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Fri, 26 Jul 2024 11:32:51 +0100 Subject: [PATCH 2/9] remove print line --- lib/iris/fileformats/cf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/iris/fileformats/cf.py b/lib/iris/fileformats/cf.py index f5b2d47c10..f10584b38d 100644 --- a/lib/iris/fileformats/cf.py +++ b/lib/iris/fileformats/cf.py @@ -1113,7 +1113,6 @@ def has_meshes(self): result = False for _, variable in self._dataset.variables.items(): cfr = getattr(variable, "cf_role", None) - print(cfr) if cfr == "mesh_topology": result = True break From c70948edf4e574f6fb4bc61b098717166550e902 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Fri, 26 Jul 2024 12:15:36 +0100 Subject: [PATCH 3/9] adjust for merge --- lib/iris/fileformats/cf.py | 52 ++++++++++++++++++++++- lib/iris/fileformats/netcdf/ugrid_load.py | 2 + 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/lib/iris/fileformats/cf.py b/lib/iris/fileformats/cf.py index 005f09e573..7da274c6cf 100644 --- a/lib/iris/fileformats/cf.py +++ b/lib/iris/fileformats/cf.py @@ -1275,6 +1275,42 @@ def __repr__(self): return "<%s of %s>" % (self.__class__.__name__, ", ".join(result)) +class CFUGridGroup(CFGroup): + """Represents a collection of CF Metadata Conventions variables and netCDF global attributes. + + Represents a collection of 'NetCDF Climate and Forecast (CF) Metadata + Conventions' variables and netCDF global attributes. + + Specialisation of :class:`~iris.fileformats.cf.CFGroup` that includes extra + collections for CF-UGRID-specific variable types. + + """ + + @property + def connectivities(self): + """Collection of CF-UGRID connectivity variables.""" + return self._cf_getter(CFUGridConnectivityVariable) + + @property + def ugrid_coords(self): + """Collection of CF-UGRID-relevant auxiliary coordinate variables.""" + return self._cf_getter(CFUGridAuxiliaryCoordinateVariable) + + @property + def meshes(self): + """Collection of CF-UGRID mesh variables.""" + return self._cf_getter(CFUGridMeshVariable) + + @property + def non_data_variable_names(self): + """:class:`set` of names of the CF-netCDF/CF-UGRID variables that are not the data pay-load.""" + extra_variables = (self.connectivities, self.ugrid_coords, self.meshes) + extra_result = set() + for variable in extra_variables: + extra_result |= set(variable) + return super().non_data_variable_names | extra_result + + ################################################################################ class CFReader: """Allows the contents of a netCDF file to be interpreted. @@ -1299,7 +1335,7 @@ class CFReader: CFUGridMeshVariable, ) - CFGroup = CFGroup + CFGroup = CFUGridGroup def __init__(self, file_source, warn=False, monotonic=False): # Ensure safe operation for destructor, should init fail. @@ -1331,8 +1367,10 @@ def __init__(self, file_source, warn=False, monotonic=False): self._check_monotonic = monotonic + self.with_ugrid = True if not self.has_meshes(): self.trim() + self.with_ugrid = False self._translate() self._build_cf_groups() @@ -1359,8 +1397,18 @@ def has_meshes(self): result = True break return result + def trim(self): - pass + self._variable_types = ( + CFAncillaryDataVariable, + CFAuxiliaryCoordinateVariable, + CFBoundaryVariable, + CFClimatologyVariable, + CFGridMappingVariable, + CFLabelVariable, + CFMeasureVariable, + ) + self.CFGroup = CFGroup @property def filename(self): diff --git a/lib/iris/fileformats/netcdf/ugrid_load.py b/lib/iris/fileformats/netcdf/ugrid_load.py index 210e112629..3b7247a3b2 100644 --- a/lib/iris/fileformats/netcdf/ugrid_load.py +++ b/lib/iris/fileformats/netcdf/ugrid_load.py @@ -56,6 +56,8 @@ def _meshes_from_cf(cf_reader): # Mesh instances are shared between file phenomena. # TODO: more sophisticated Mesh sharing between files. # TODO: access external Mesh cache? + if not cf_reader.with_ugrid: + return {} mesh_vars = cf_reader.cf_group.meshes meshes = { name: _build_mesh(cf_reader, var, cf_reader.filename) From fffe32e9aed768b4ae77a65adf8aeb3e8cac8375 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Fri, 26 Jul 2024 13:27:02 +0100 Subject: [PATCH 4/9] adjust to tolerant load behaviour, make methods private --- lib/iris/fileformats/cf.py | 15 +++++++-------- lib/iris/fileformats/netcdf/ugrid_load.py | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/lib/iris/fileformats/cf.py b/lib/iris/fileformats/cf.py index 7da274c6cf..c496f346a1 100644 --- a/lib/iris/fileformats/cf.py +++ b/lib/iris/fileformats/cf.py @@ -1367,10 +1367,10 @@ def __init__(self, file_source, warn=False, monotonic=False): self._check_monotonic = monotonic - self.with_ugrid = True - if not self.has_meshes(): - self.trim() - self.with_ugrid = False + self._with_ugrid = True + if not self._has_meshes(): + self._trim() + self._with_ugrid = False self._translate() self._build_cf_groups() @@ -1389,16 +1389,15 @@ def __exit__(self, exc_type, exc_value, traceback): # When used as a context-manager, **always** close the file on exit. self._close() - def has_meshes(self): + def _has_meshes(self): result = False for _, variable in self._dataset.variables.items(): - cfr = getattr(variable, "cf_role", None) - if cfr == "mesh_topology": + if hasattr(variable, "mesh"): result = True break return result - def trim(self): + def _trim(self): self._variable_types = ( CFAncillaryDataVariable, CFAuxiliaryCoordinateVariable, diff --git a/lib/iris/fileformats/netcdf/ugrid_load.py b/lib/iris/fileformats/netcdf/ugrid_load.py index 3b7247a3b2..48cfc940f9 100644 --- a/lib/iris/fileformats/netcdf/ugrid_load.py +++ b/lib/iris/fileformats/netcdf/ugrid_load.py @@ -56,7 +56,7 @@ def _meshes_from_cf(cf_reader): # Mesh instances are shared between file phenomena. # TODO: more sophisticated Mesh sharing between files. # TODO: access external Mesh cache? - if not cf_reader.with_ugrid: + if not cf_reader._with_ugrid: return {} mesh_vars = cf_reader.cf_group.meshes meshes = { From 071f5ee8cfc68215a7eee58b90441094117afe1e Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Fri, 26 Jul 2024 13:39:50 +0100 Subject: [PATCH 5/9] fix tests --- lib/iris/experimental/ugrid/cf.py | 0 lib/iris/fileformats/cf.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 lib/iris/experimental/ugrid/cf.py diff --git a/lib/iris/experimental/ugrid/cf.py b/lib/iris/experimental/ugrid/cf.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/lib/iris/fileformats/cf.py b/lib/iris/fileformats/cf.py index c496f346a1..87a50a94eb 100644 --- a/lib/iris/fileformats/cf.py +++ b/lib/iris/fileformats/cf.py @@ -1392,7 +1392,7 @@ def __exit__(self, exc_type, exc_value, traceback): def _has_meshes(self): result = False for _, variable in self._dataset.variables.items(): - if hasattr(variable, "mesh"): + if hasattr(variable, "mesh") or hasattr(variable, "node_coordinates"): result = True break return result From b0c606bc96e030db7ae556114e821e8880e8a352 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Fri, 26 Jul 2024 14:16:03 +0100 Subject: [PATCH 6/9] simplify changes --- lib/iris/fileformats/cf.py | 43 +++----------------------------------- 1 file changed, 3 insertions(+), 40 deletions(-) diff --git a/lib/iris/fileformats/cf.py b/lib/iris/fileformats/cf.py index 87a50a94eb..5aee4f117d 100644 --- a/lib/iris/fileformats/cf.py +++ b/lib/iris/fileformats/cf.py @@ -1275,42 +1275,6 @@ def __repr__(self): return "<%s of %s>" % (self.__class__.__name__, ", ".join(result)) -class CFUGridGroup(CFGroup): - """Represents a collection of CF Metadata Conventions variables and netCDF global attributes. - - Represents a collection of 'NetCDF Climate and Forecast (CF) Metadata - Conventions' variables and netCDF global attributes. - - Specialisation of :class:`~iris.fileformats.cf.CFGroup` that includes extra - collections for CF-UGRID-specific variable types. - - """ - - @property - def connectivities(self): - """Collection of CF-UGRID connectivity variables.""" - return self._cf_getter(CFUGridConnectivityVariable) - - @property - def ugrid_coords(self): - """Collection of CF-UGRID-relevant auxiliary coordinate variables.""" - return self._cf_getter(CFUGridAuxiliaryCoordinateVariable) - - @property - def meshes(self): - """Collection of CF-UGRID mesh variables.""" - return self._cf_getter(CFUGridMeshVariable) - - @property - def non_data_variable_names(self): - """:class:`set` of names of the CF-netCDF/CF-UGRID variables that are not the data pay-load.""" - extra_variables = (self.connectivities, self.ugrid_coords, self.meshes) - extra_result = set() - for variable in extra_variables: - extra_result |= set(variable) - return super().non_data_variable_names | extra_result - - ################################################################################ class CFReader: """Allows the contents of a netCDF file to be interpreted. @@ -1335,7 +1299,7 @@ class CFReader: CFUGridMeshVariable, ) - CFGroup = CFUGridGroup + CFGroup = CFGroup def __init__(self, file_source, warn=False, monotonic=False): # Ensure safe operation for destructor, should init fail. @@ -1369,7 +1333,7 @@ def __init__(self, file_source, warn=False, monotonic=False): self._with_ugrid = True if not self._has_meshes(): - self._trim() + self._trim_ugrid_variable_types() self._with_ugrid = False self._translate() @@ -1397,7 +1361,7 @@ def _has_meshes(self): break return result - def _trim(self): + def _trim_ugrid_variable_types(self): self._variable_types = ( CFAncillaryDataVariable, CFAuxiliaryCoordinateVariable, @@ -1407,7 +1371,6 @@ def _trim(self): CFLabelVariable, CFMeasureVariable, ) - self.CFGroup = CFGroup @property def filename(self): From c33e98f7d4deb90ee51586f7b326544efb858e89 Mon Sep 17 00:00:00 2001 From: stephenworsley <49274989+stephenworsley@users.noreply.github.com> Date: Mon, 29 Jul 2024 14:44:47 +0100 Subject: [PATCH 7/9] Update lib/iris/fileformats/cf.py Co-authored-by: Bill Little --- lib/iris/fileformats/cf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/iris/fileformats/cf.py b/lib/iris/fileformats/cf.py index 5aee4f117d..024bcb6f1d 100644 --- a/lib/iris/fileformats/cf.py +++ b/lib/iris/fileformats/cf.py @@ -1355,7 +1355,7 @@ def __exit__(self, exc_type, exc_value, traceback): def _has_meshes(self): result = False - for _, variable in self._dataset.variables.items(): + for variable in self._dataset.variables.values(): if hasattr(variable, "mesh") or hasattr(variable, "node_coordinates"): result = True break From be23489eb75edde4aaca66c8cad3c61f0fa816a3 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Mon, 29 Jul 2024 14:47:15 +0100 Subject: [PATCH 8/9] address review comment --- lib/iris/fileformats/netcdf/ugrid_load.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/iris/fileformats/netcdf/ugrid_load.py b/lib/iris/fileformats/netcdf/ugrid_load.py index 48cfc940f9..0a70567f16 100644 --- a/lib/iris/fileformats/netcdf/ugrid_load.py +++ b/lib/iris/fileformats/netcdf/ugrid_load.py @@ -56,13 +56,13 @@ def _meshes_from_cf(cf_reader): # Mesh instances are shared between file phenomena. # TODO: more sophisticated Mesh sharing between files. # TODO: access external Mesh cache? - if not cf_reader._with_ugrid: - return {} - mesh_vars = cf_reader.cf_group.meshes - meshes = { - name: _build_mesh(cf_reader, var, cf_reader.filename) - for name, var in mesh_vars.items() - } + meshes = {} + if cf_reader._with_ugrid: + mesh_vars = cf_reader.cf_group.meshes + meshes = { + name: _build_mesh(cf_reader, var, cf_reader.filename) + for name, var in mesh_vars.items() + } return meshes From 73e3be1b8c11d4fbc844548b1542a83ee15e021f Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Mon, 29 Jul 2024 15:07:38 +0100 Subject: [PATCH 9/9] add whatsnew --- docs/src/whatsnew/latest.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index e04b832e23..ef6dd4355a 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -95,6 +95,10 @@ This document explains the changes made to Iris for this release subclasses of a common generic :class:`~iris.mesh.components.Mesh` class. (:issue:`6057`, :pull:`6061`, :pull:`6077`) +#. `@pp-mo`_ and `@stephenworsley`_ Turned on UGRID loading by default, effectively removing + the need for and deprecating the :func:`~iris.ugrid.experimental.PARSE_UGRID_ON_LOAD` + context manager. (:pull:`6054`, :pull:`6088`) + 🚀 Performance Enhancements ===========================