From 886356c275b858696557c16dffb257cc5914be83 Mon Sep 17 00:00:00 2001
From: Heiko Klein <heiko.klein@met.no>
Date: Fri, 3 May 2024 09:15:17 +0000
Subject: [PATCH 1/2] adding more with-statements where close missing

---
 pyaerocom/colocation.py                       |  46 +--
 pyaerocom/extras/satellite_l2/aeolus_l2a.py   |   7 +-
 pyaerocom/helpers_landsea_masks.py            |   4 +-
 pyaerocom/io/cachehandler_ungridded.py        |  39 +-
 pyaerocom/io/ghost/reader.py                  | 126 +++----
 pyaerocom/io/read_earlinet.py                 | 350 +++++++++---------
 .../testdata-minimal/create_subsets_ghost.py  |  10 +-
 tests/io/ghost/test_reader.py                 |   2 +-
 8 files changed, 295 insertions(+), 289 deletions(-)

diff --git a/pyaerocom/colocation.py b/pyaerocom/colocation.py
index dd4348341..3b5f038eb 100644
--- a/pyaerocom/colocation.py
+++ b/pyaerocom/colocation.py
@@ -972,7 +972,6 @@ def correct_model_stp_coldata(coldata, p0=None, t0=273.15, inplace=False):
 
     if not inplace:
         coldata = coldata.copy()
-    temp = xr.open_dataset(const.ERA5_SURFTEMP_FILE)["t2m"]
 
     arr = coldata.data
 
@@ -987,37 +986,38 @@ def correct_model_stp_coldata(coldata, p0=None, t0=273.15, inplace=False):
     mintemps = []
     maxtemps = []
     ps = []
-    for i, (lat, lon, alt, name) in enumerate(coords):
-        logger.info(name, ", Lat", lat, ", Lon", lon)
-        p = pressure(alt)
-        logger.info("Alt", alt)
-        logger.info("P=", p / 100, "hPa")
+    with xr.open_dataset(const.ERA5_SURFTEMP_FILE)["t2m"] as temp:
+        for i, (lat, lon, alt, name) in enumerate(coords):
+            logger.info(name, ", Lat", lat, ", Lon", lon)
+            p = pressure(alt)
+            logger.info("Alt", alt)
+            logger.info("P=", p / 100, "hPa")
 
-        ps.append(p / 100)
+            ps.append(p / 100)
 
-        temps = temp.sel(latitude=lat, longitude=lon, method="nearest").data
+            temps = temp.sel(latitude=lat, longitude=lon, method="nearest").data
 
-        meantemps.append(temps.mean())
-        mintemps.append(temps.min())
-        maxtemps.append(temps.min())
+            meantemps.append(temps.mean())
+            mintemps.append(temps.min())
+            maxtemps.append(temps.min())
 
-        if not len(temps) == len(arr.time):
-            raise NotImplementedError("Check timestamps")
-        logger.info("Mean Temp: ", temps.mean() - t0, " C")
+            if not len(temps) == len(arr.time):
+                raise NotImplementedError("Check timestamps")
+            logger.info("Mean Temp: ", temps.mean() - t0, " C")
 
-        corrfacs = (p0 / p) * (temps / t0)
+            corrfacs = (p0 / p) * (temps / t0)
 
-        logger.info("Corr fac:", corrfacs.mean(), "+/-", corrfacs.std())
+            logger.info("Corr fac:", corrfacs.mean(), "+/-", corrfacs.std())
 
-        cfacs.append(corrfacs.mean())
+            cfacs.append(corrfacs.mean())
 
-        # mularr = xr.DataArray(corrfacs)
+            # mularr = xr.DataArray(corrfacs)
 
-        if not arr.station_name.values[i] == name:
-            raise Exception
-        elif not arr.dims[1] == "time":
-            raise Exception
-        arr[1, :, i] *= corrfacs
+            if not arr.station_name.values[i] == name:
+                raise Exception
+            elif not arr.dims[1] == "time":
+                raise Exception
+            arr[1, :, i] *= corrfacs
 
     cfacs = np.asarray(cfacs)
 
diff --git a/pyaerocom/extras/satellite_l2/aeolus_l2a.py b/pyaerocom/extras/satellite_l2/aeolus_l2a.py
index 569695ae3..1ad9449ad 100755
--- a/pyaerocom/extras/satellite_l2/aeolus_l2a.py
+++ b/pyaerocom/extras/satellite_l2/aeolus_l2a.py
@@ -3179,16 +3179,15 @@ def read_model_file(self, file_name, topofile=None, vars_to_keep=None):
         if topofile is not None:
             # read topography since that needs to be added to the ground following height of the model
             self.logger.info("reading topography file {}".format(options["topofile"]))
-            topo_data = xr.open_dataset(options["topofile"])
-            topo_altitudes = np.squeeze(topo_data[self.EMEP_TOPO_FILE_VAR_NAME])
-            topo_data.close()
+            with xr.open_dataset(options["topofile"]) as topo_data:
+                topo_altitudes = np.squeeze(topo_data[self.EMEP_TOPO_FILE_VAR_NAME])
 
         if not os.path.exists(file_name):
             obj.logger.info(f"file does not exist: {file_name}. skipping colocation ...")
             return False
         # read netcdf file if it has not yet been loaded
         obj.logger.info(f"reading model file {file_name}")
-        nc_data = xr.open_dataset(file_name)
+        nc_data = xr.load_dataset(file_name)
         nc_data[self._LATITUDENAME] = nc_data[self.EMEP_VAR_NAME_DICT[self._LATITUDENAME]]
         nc_data[self._LONGITUDENAME] = nc_data[self.EMEP_VAR_NAME_DICT[self._LONGITUDENAME]]
         nc_data[self._TIME_NAME] = nc_data[self.EMEP_VAR_NAME_DICT[self._TIME_NAME]]
diff --git a/pyaerocom/helpers_landsea_masks.py b/pyaerocom/helpers_landsea_masks.py
index 01d13c609..e841b7af4 100644
--- a/pyaerocom/helpers_landsea_masks.py
+++ b/pyaerocom/helpers_landsea_masks.py
@@ -149,10 +149,10 @@ def load_region_mask_xr(*regions):
     for i, fil in enumerate(get_htap_mask_files(*regions)):
         r = regions[i]
         if i == 0:
-            masks = xr.open_dataset(fil)[r + "htap"]
+            masks = xr.load_dataset(fil)[r + "htap"]
             name = r
         else:
-            masks += xr.open_dataset(fil)[r + "htap"]
+            masks += xr.load_dataset(fil)[r + "htap"]
             name += f"-{r}"
     if masks is not None:
         mask = masks.where(masks < 1, 1)
diff --git a/pyaerocom/io/cachehandler_ungridded.py b/pyaerocom/io/cachehandler_ungridded.py
index 0ef3427e1..79c85c3eb 100644
--- a/pyaerocom/io/cachehandler_ungridded.py
+++ b/pyaerocom/io/cachehandler_ungridded.py
@@ -235,35 +235,36 @@ class (which should not happen)
 
         delete_existing = const.RM_CACHE_OUTDATED if not force_use_outdated else False
 
-        in_handle = open(fp, "rb")
-        if force_use_outdated:
-            last_meta = pickle.load(in_handle)
-            assert len(last_meta) == len(self.CACHE_HEAD_KEYS)
-            ok = True
-        else:
-            try:
-                ok = self._check_pkl_head_vs_database(in_handle)
-            except Exception as e:
-                ok = False
-                delete_existing = True
-                logger.exception(
-                    f"File error in cached data file {fp}. "
-                    f"File will be removed and data reloaded. Error: {repr(e)}"
-                )
+        with open(fp, "rb") as in_handle:
+            if force_use_outdated:
+                last_meta = pickle.load(in_handle)
+                assert len(last_meta) == len(self.CACHE_HEAD_KEYS)
+                ok = True
+            else:
+                try:
+                    ok = self._check_pkl_head_vs_database(in_handle)
+                except Exception as e:
+                    ok = False
+                    delete_existing = True
+                    logger.exception(
+                        f"File error in cached data file {fp}. "
+                        f"File will be removed and data reloaded. Error: {repr(e)}"
+                    )
+            if ok:
+                # everything is okay, or forced
+                data = pickle.load(in_handle)
+
         if not ok:
-            # TODO: Should we delete the cache file if it is outdated ???
+            # Delete the cache file if it is outdated, after handle is closed
             logger.info(
                 f"Aborting reading cache file {fp}. Aerocom database "
                 f"or pyaerocom version has changed compared to cached version"
             )
-            in_handle.close()
             if delete_existing:  # something was wrong
                 logger.info(f"Deleting outdated cache file: {fp}")
                 os.remove(fp)
             return False
 
-        # everything is okay
-        data = pickle.load(in_handle)
         if not isinstance(data, UngriddedData):
             raise TypeError(
                 f"Unexpected data type stored in cache file, need instance of UngriddedData, "
diff --git a/pyaerocom/io/ghost/reader.py b/pyaerocom/io/ghost/reader.py
index facd2f322..31fb0a16a 100644
--- a/pyaerocom/io/ghost/reader.py
+++ b/pyaerocom/io/ghost/reader.py
@@ -339,68 +339,70 @@ def read_file(self, filename, var_to_read=None, invalidate_flags=None, var_to_wr
         if var_to_write is None:
             var_to_write = self.var_names_data_inv[var_to_read]
 
-        ds = xr.open_dataset(filename)
-
-        if not {"station", "time"}.issubset(ds.dims):  # pragma: no cover
-            raise AttributeError("Missing dimensions")
-        if not "station_name" in ds:  # pragma: no cover
-            raise AttributeError("No variable station_name found")
-
-        stats = []
-
-        # get all station metadata values as numpy arrays, since xarray isel,
-        # __getitem__, __getattr__ are slow... this can probably be solved
-        # more elegantly
-        meta_glob = {}
-        for meta_key in self.META_KEYS:
-            try:
-                meta_glob[meta_key] = ds[meta_key].values
-            except KeyError:  # pragma: no cover
-                logger.warning(f"No such metadata key in GHOST data file: {Path(filename).name}")
-
-        for meta_key, to_unit in self.CONVERT_UNITS_META.items():
-            from_unit = ds[meta_key].attrs["units"]
-
-            if from_unit != to_unit:
-                cfac = cf_units.Unit(from_unit).convert(1, to_unit)
-                meta_glob[meta_key] *= cfac
-
-        tvals = ds["time"].values
-
-        vardata = ds[var_to_read]  # DataArray
-        varinfo = vardata.attrs
-
-        # ToDo: it is important that station comes first since we use numpy
-        # indexing below and not xarray.isel or similar, due to performance
-        # issues. This may need to be updated in case of profile data.
-        assert vardata.dims == ("station", "time")
-        data_np = vardata.values
-
-        # evaluate flags
-        invalid = self._eval_flags(vardata, invalidate_flags, ds)
-
-        for idx in ds.station.values:
-            stat = {}
-            meta = StationMetaData()
-            meta["ts_type"] = self.TS_TYPE
-            stat["time"] = tvals
-            stat["meta"] = meta
-            meta["var_info"] = {}
-
-            for meta_key, vals in meta_glob.items():
-                meta[meta_key] = vals[idx]
-
-            # vardata = subset[var_name]
-            stat[var_to_write] = data_np[idx]
-
-            meta["var_info"][var_to_write] = {}
-            meta["var_info"][var_to_write].update(varinfo)
-
-            # import flagdata (2D array with time and flag dimensions)
-            # invalid = self._eval_flags(vardata, invalidate_flags)
-            stat["data_flagged"] = {}
-            stat["data_flagged"][var_to_write] = invalid[idx]
-            stats.append(stat)
+        with xr.open_dataset(filename) as ds:
+
+            if not {"station", "time"}.issubset(ds.dims):  # pragma: no cover
+                raise AttributeError("Missing dimensions")
+            if not "station_name" in ds:  # pragma: no cover
+                raise AttributeError("No variable station_name found")
+
+            stats = []
+
+            # get all station metadata values as numpy arrays, since xarray isel,
+            # __getitem__, __getattr__ are slow... this can probably be solved
+            # more elegantly
+            meta_glob = {}
+            for meta_key in self.META_KEYS:
+                try:
+                    meta_glob[meta_key] = ds[meta_key].values
+                except KeyError:  # pragma: no cover
+                    logger.warning(
+                        f"No such metadata key in GHOST data file: {Path(filename).name}"
+                    )
+
+            for meta_key, to_unit in self.CONVERT_UNITS_META.items():
+                from_unit = ds[meta_key].attrs["units"]
+
+                if from_unit != to_unit:
+                    cfac = cf_units.Unit(from_unit).convert(1, to_unit)
+                    meta_glob[meta_key] *= cfac
+
+            tvals = ds["time"].values
+
+            vardata = ds[var_to_read]  # DataArray
+            varinfo = vardata.attrs
+
+            # ToDo: it is important that station comes first since we use numpy
+            # indexing below and not xarray.isel or similar, due to performance
+            # issues. This may need to be updated in case of profile data.
+            assert vardata.dims == ("station", "time")
+            data_np = vardata.values
+
+            # evaluate flags
+            invalid = self._eval_flags(vardata, invalidate_flags, ds)
+
+            for idx in ds.station.values:
+                stat = {}
+                meta = StationMetaData()
+                meta["ts_type"] = self.TS_TYPE
+                stat["time"] = tvals
+                stat["meta"] = meta
+                meta["var_info"] = {}
+
+                for meta_key, vals in meta_glob.items():
+                    meta[meta_key] = vals[idx]
+
+                # vardata = subset[var_name]
+                stat[var_to_write] = data_np[idx]
+
+                meta["var_info"][var_to_write] = {}
+                meta["var_info"][var_to_write].update(varinfo)
+
+                # import flagdata (2D array with time and flag dimensions)
+                # invalid = self._eval_flags(vardata, invalidate_flags)
+                stat["data_flagged"] = {}
+                stat["data_flagged"][var_to_write] = invalid[idx]
+                stats.append(stat)
 
         return stats
 
diff --git a/pyaerocom/io/read_earlinet.py b/pyaerocom/io/read_earlinet.py
index b6931cf37..018ee9e99 100755
--- a/pyaerocom/io/read_earlinet.py
+++ b/pyaerocom/io/read_earlinet.py
@@ -228,192 +228,196 @@ def read_file(self, filename, vars_to_retrieve=None, read_err=None, remove_outli
         # Iterate over the lines of the file
         self.logger.debug(f"Reading file {filename}")
 
-        data_in = xarray.open_dataset(filename, engine="netcdf4")
-
-        # getting the coords since no longer in metadata
-        # Put also just in the attributes. not sure why appears twice
-        data_out["station_coords"]["longitude"] = data_out["longitude"] = np.float64(
-            data_in["longitude"].values
-        )
-        data_out["station_coords"]["latitude"] = data_out["latitude"] = np.float64(
-            data_in["latitude"].values
-        )
-        data_out["altitude"] = np.float64(
-            data_in[
+        with xarray.open_dataset(filename, engine="netcdf4") as data_in:
+
+            # getting the coords since no longer in metadata
+            # Put also just in the attributes. not sure why appears twice
+            data_out["station_coords"]["longitude"] = data_out["longitude"] = np.float64(
+                data_in["longitude"].values
+            )
+            data_out["station_coords"]["latitude"] = data_out["latitude"] = np.float64(
+                data_in["latitude"].values
+            )
+            data_out["altitude"] = np.float64(
+                data_in[
+                    "altitude"
+                ].values  # altitude is defined in EARLINET in terms of altitude above sea level
+            )  # Note altitude is an array for the data, station altitude is different
+            data_out["station_coords"]["altitude"] = np.float64(data_in.station_altitude)
+            data_out["altitude_attrs"] = data_in[
                 "altitude"
-            ].values  # altitude is defined in EARLINET in terms of altitude above sea level
-        )  # Note altitude is an array for the data, station altitude is different
-        data_out["station_coords"]["altitude"] = np.float64(data_in.station_altitude)
-        data_out["altitude_attrs"] = data_in[
-            "altitude"
-        ].attrs  # get attrs for altitude units + extra
-
-        # get intersection of metadaa in ddataa_out and data_in
-        for k, v in self.META_NAMES_FILE.items():
-            if v in self.META_NEEDED:
-                _meta = data_in.attrs[v]
-            else:
-                try:
-                    _meta = data_in.attrs[v]
-                except Exception:  # pragma: no cover
-                    _meta = None
-            data_out[k] = _meta
-
-        # get metadata expected in StationData but not in data_in's metadata
-        data_out["wavelength_emis"] = data_in["wavelength"]
-        data_out["shots"] = np.float64(data_in["shots"])
-        data_out["zenith_angle"] = np.float64(data_in["zenith_angle"])
-        data_out["filename"] = filename
-        if "Lev02" in filename:
-            data_out["data_level"] = 2
-        loc_split = data_in.attrs["location"].split(", ")
-        data_out["station_name"] = loc_split[0]
-        if len(loc_split) > 1:
-            data_out["country"] = loc_split[1]
-
-        dtime = pd.Timestamp(data_in.measurement_start_datetime).to_numpy().astype("datetime64[s]")
-        stop = pd.Timestamp(data_in.measurement_stop_datetime).to_numpy().astype("datetime64[s]")
-
-        # in case measurement goes over midnight into a new day
-        if stop < dtime:
-            stop = stop + np.timedelta64(1, "[D]")
-
-        data_out["dtime"] = [dtime]
-        data_out["stopdtime"] = [stop]
-        data_out["has_zdust"] = False
+            ].attrs  # get attrs for altitude units + extra
 
-        for var in vars_to_read:
-            data_out["var_info"][var] = {}
-            err_read = False
-            unit_ok = False
-            outliers_removed = False
-            has_altitude = False
-
-            netcdf_var_name = self.VAR_NAMES_FILE[var]
-            # check if the desired variable is in the file
-            if netcdf_var_name not in data_in.variables:
-                self.logger.warning(f"Variable {var} not found in file {filename}")
-                continue
+            # get intersection of metadaa in ddataa_out and data_in
+            for k, v in self.META_NAMES_FILE.items():
+                if v in self.META_NEEDED:
+                    _meta = data_in.attrs[v]
+                else:
+                    try:
+                        _meta = data_in.attrs[v]
+                    except Exception:  # pragma: no cover
+                        _meta = None
+                data_out[k] = _meta
+
+            # get metadata expected in StationData but not in data_in's metadata
+            data_out["wavelength_emis"] = data_in["wavelength"]
+            data_out["shots"] = np.float64(data_in["shots"])
+            data_out["zenith_angle"] = np.float64(data_in["zenith_angle"])
+            data_out["filename"] = filename
+            if "Lev02" in filename:
+                data_out["data_level"] = 2
+            loc_split = data_in.attrs["location"].split(", ")
+            data_out["station_name"] = loc_split[0]
+            if len(loc_split) > 1:
+                data_out["country"] = loc_split[1]
+
+            dtime = (
+                pd.Timestamp(data_in.measurement_start_datetime).to_numpy().astype("datetime64[s]")
+            )
+            stop = (
+                pd.Timestamp(data_in.measurement_stop_datetime).to_numpy().astype("datetime64[s]")
+            )
 
-            info = var_info[var]
-            # xarray.DataArray
-            arr = data_in.variables[netcdf_var_name]
-            # the actual data as numpy array (or float if 0-D data, e.g. zdust)
-            val = np.squeeze(np.float64(arr))  # squeeze to 1D array
-
-            # CONVERT UNIT
-            unit = None
-
-            unames = self.VAR_UNIT_NAMES[netcdf_var_name]
-            for u in unames:
-                if u in arr.attrs:
-                    unit = arr.attrs[u]
-            if unit is None:
-                raise DataUnitError(f"Unit of {var} could not be accessed in file {filename}")
-            unit_fac = None
-            try:
-                to_unit = self._var_info[var].units
-                unit_fac = get_unit_conversion_fac(unit, to_unit)
-                val *= unit_fac
-                unit = to_unit
-                unit_ok = True
-            except Exception as e:
-                logger.warning(
-                    f"Failed to convert unit of {var} in file {filename} (Earlinet): "
-                    f"Error: {repr(e)}"
-                )
+            # in case measurement goes over midnight into a new day
+            if stop < dtime:
+                stop = stop + np.timedelta64(1, "[D]")
+
+            data_out["dtime"] = [dtime]
+            data_out["stopdtime"] = [stop]
+            data_out["has_zdust"] = False
+
+            for var in vars_to_read:
+                data_out["var_info"][var] = {}
+                err_read = False
+                unit_ok = False
+                outliers_removed = False
+                has_altitude = False
+
+                netcdf_var_name = self.VAR_NAMES_FILE[var]
+                # check if the desired variable is in the file
+                if netcdf_var_name not in data_in.variables:
+                    self.logger.warning(f"Variable {var} not found in file {filename}")
+                    continue
 
-            # import errors if applicable
-            err = np.nan
-            if read_err and var in self.ERR_VARNAMES:
-                err_name = self.ERR_VARNAMES[var]
-                if err_name in data_in.variables:
-                    err = np.squeeze(np.float64(data_in.variables[err_name]))
-                    if unit_ok:
-                        err *= unit_fac
-                    err_read = True
-
-            # 1D variable
-            if var == "zdust":
-                if not val.ndim == 0:
-                    raise ValueError("Fatal: dust layer height data must be single value")
-
-                if unit_ok and info.minimum < val < info.maximum:
-                    logger.warning(f"zdust value {val} out of range, setting to NaN")
-                    val = np.nan
-
-                if np.isnan(val):
-                    self.logger.warning(
-                        f"Invalid value of variable zdust in file {filename}. Skipping...!"
+                info = var_info[var]
+                # xarray.DataArray
+                arr = data_in.variables[netcdf_var_name]
+                # the actual data as numpy array (or float if 0-D data, e.g. zdust)
+                val = np.squeeze(np.float64(arr))  # squeeze to 1D array
+
+                # CONVERT UNIT
+                unit = None
+
+                unames = self.VAR_UNIT_NAMES[netcdf_var_name]
+                for u in unames:
+                    if u in arr.attrs:
+                        unit = arr.attrs[u]
+                if unit is None:
+                    raise DataUnitError(f"Unit of {var} could not be accessed in file {filename}")
+                unit_fac = None
+                try:
+                    to_unit = self._var_info[var].units
+                    unit_fac = get_unit_conversion_fac(unit, to_unit)
+                    val *= unit_fac
+                    unit = to_unit
+                    unit_ok = True
+                except Exception as e:
+                    logger.warning(
+                        f"Failed to convert unit of {var} in file {filename} (Earlinet): "
+                        f"Error: {repr(e)}"
                     )
-                    continue
 
-                data_out["has_zdust"] = True
-                data_out[var] = val
+                # import errors if applicable
+                err = np.nan
+                if read_err and var in self.ERR_VARNAMES:
+                    err_name = self.ERR_VARNAMES[var]
+                    if err_name in data_in.variables:
+                        err = np.squeeze(np.float64(data_in.variables[err_name]))
+                        if unit_ok:
+                            err *= unit_fac
+                        err_read = True
+
+                # 1D variable
+                if var == "zdust":
+                    if not val.ndim == 0:
+                        raise ValueError("Fatal: dust layer height data must be single value")
+
+                    if unit_ok and info.minimum < val < info.maximum:
+                        logger.warning(f"zdust value {val} out of range, setting to NaN")
+                        val = np.nan
+
+                    if np.isnan(val):
+                        self.logger.warning(
+                            f"Invalid value of variable zdust in file {filename}. Skipping...!"
+                        )
+                        continue
 
-            else:
-                if not val.ndim == 1:
-                    raise ValueError("Extinction data must be one dimensional")
-                elif len(val) == 0:
-                    continue  # no data
-                # Remove NaN equivalent values
-                val[val > self._MAX_VAL_NAN] = np.nan
-
-                wvlg = var_info[var].wavelength_nm
-                wvlg_str = self.META_NAMES_FILE["wavelength_emis"]
-
-                if not wvlg == float(data_in[wvlg_str]):
-                    self.logger.info("No wavelength match")
-                    continue
+                    data_out["has_zdust"] = True
+                    data_out[var] = val
 
-                alt_id = self.ALTITUDE_ID
-                alt_data = data_in.variables[alt_id]
+                else:
+                    if not val.ndim == 1:
+                        raise ValueError("Extinction data must be one dimensional")
+                    elif len(val) == 0:
+                        continue  # no data
+                    # Remove NaN equivalent values
+                    val[val > self._MAX_VAL_NAN] = np.nan
 
-                alt_vals = np.float64(alt_data)
-                alt_unit = alt_data.attrs[self.VAR_UNIT_NAMES[alt_id]]
-                to_alt_unit = const.VARS["alt"].units
-                if not alt_unit == to_alt_unit:
-                    try:
-                        alt_unit_fac = get_unit_conversion_fac(alt_unit, to_alt_unit)
-                        alt_vals *= alt_unit_fac
-                        alt_unit = to_alt_unit
-                    except Exception as e:
-                        self.logger.warning(f"Failed to convert unit: {repr(e)}")
-                has_altitude = True
-
-                # remove outliers from data, if applicable
-                if remove_outliers and unit_ok:
-                    # REMOVE OUTLIERS
-                    outlier_mask = np.logical_or(val < info.minimum, val > info.maximum)
-                    val[outlier_mask] = np.nan
+                    wvlg = var_info[var].wavelength_nm
+                    wvlg_str = self.META_NAMES_FILE["wavelength_emis"]
+
+                    if not wvlg == float(data_in[wvlg_str]):
+                        self.logger.info("No wavelength match")
+                        continue
 
+                    alt_id = self.ALTITUDE_ID
+                    alt_data = data_in.variables[alt_id]
+
+                    alt_vals = np.float64(alt_data)
+                    alt_unit = alt_data.attrs[self.VAR_UNIT_NAMES[alt_id]]
+                    to_alt_unit = const.VARS["alt"].units
+                    if not alt_unit == to_alt_unit:
+                        try:
+                            alt_unit_fac = get_unit_conversion_fac(alt_unit, to_alt_unit)
+                            alt_vals *= alt_unit_fac
+                            alt_unit = to_alt_unit
+                        except Exception as e:
+                            self.logger.warning(f"Failed to convert unit: {repr(e)}")
+                    has_altitude = True
+
+                    # remove outliers from data, if applicable
+                    if remove_outliers and unit_ok:
+                        # REMOVE OUTLIERS
+                        outlier_mask = np.logical_or(val < info.minimum, val > info.maximum)
+                        val[outlier_mask] = np.nan
+
+                        if err_read:
+                            err[outlier_mask] = np.nan
+                        outliers_removed = True
+                    # remove outliers from errors if applicable
                     if err_read:
-                        err[outlier_mask] = np.nan
-                    outliers_removed = True
-                # remove outliers from errors if applicable
-                if err_read:
-                    err[err > self._MAX_VAL_NAN] = np.nan
-
-                # create instance of ProfileData
-                profile = VerticalProfile(
-                    data=val,
-                    altitude=alt_vals,
-                    dtime=dtime,
-                    var_name=var,
-                    data_err=err,
-                    var_unit=unit,
-                    altitude_unit=alt_unit,
-                )
+                        err[err > self._MAX_VAL_NAN] = np.nan
+
+                    # create instance of ProfileData
+                    profile = VerticalProfile(
+                        data=val,
+                        altitude=alt_vals,
+                        dtime=dtime,
+                        var_name=var,
+                        data_err=err,
+                        var_unit=unit,
+                        altitude_unit=alt_unit,
+                    )
 
-                # Write everything into profile
-                data_out[var] = profile
+                    # Write everything into profile
+                    data_out[var] = profile
 
-            data_out["var_info"][var].update(
-                unit_ok=unit_ok,
-                err_read=err_read,
-                outliers_removed=outliers_removed,
-                has_altitute=has_altitude,
-            )
+                data_out["var_info"][var].update(
+                    unit_ok=unit_ok,
+                    err_read=err_read,
+                    outliers_removed=outliers_removed,
+                    has_altitute=has_altitude,
+                )
         return data_out
 
     def read(
diff --git a/pyaerocom/scripts/testdata-minimal/create_subsets_ghost.py b/pyaerocom/scripts/testdata-minimal/create_subsets_ghost.py
index e70270c49..a5a5e8864 100644
--- a/pyaerocom/scripts/testdata-minimal/create_subsets_ghost.py
+++ b/pyaerocom/scripts/testdata-minimal/create_subsets_ghost.py
@@ -62,10 +62,10 @@
                 print(file_out)
                 assert os.path.exists(file_in)
 
-                ds = xr.open_dataset(file_in)
-                subset = ds.isel(station=slice(0, numst))
-                if numts is not None:
-                    subset = subset.isel(time=slice(0, numts))
+                with xr.open_dataset(file_in) as ds:
+                    subset = ds.isel(station=slice(0, numst))
+                    if numts is not None:
+                        subset = subset.isel(time=slice(0, numts))
 
-                subset.to_netcdf(file_out)
+                    subset.to_netcdf(file_out)
                 print("Saved")
diff --git a/tests/io/ghost/test_reader.py b/tests/io/ghost/test_reader.py
index ba4932587..491df883c 100644
--- a/tests/io/ghost/test_reader.py
+++ b/tests/io/ghost/test_reader.py
@@ -132,7 +132,7 @@ def test__eval_flags_slice(self):
         reader = self.default_reader
         file = reader.files[-1]
         assert Path(file).name == "sconco3_201810.nc"
-        ds = xr.open_dataset(file)
+        ds = xr.load_dataset(file)
 
         flagvar = "qa"
         numvalid = 3

From 8b17ae03288c2bd1c0daad5c929639b359152637 Mon Sep 17 00:00:00 2001
From: Heiko Klein <heiko.klein@met.no>
Date: Fri, 3 May 2024 09:25:22 +0000
Subject: [PATCH 2/2] linting

---
 pyaerocom/io/ghost/reader.py  | 1 -
 pyaerocom/io/read_earlinet.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/pyaerocom/io/ghost/reader.py b/pyaerocom/io/ghost/reader.py
index 31fb0a16a..b6eaf6064 100644
--- a/pyaerocom/io/ghost/reader.py
+++ b/pyaerocom/io/ghost/reader.py
@@ -340,7 +340,6 @@ def read_file(self, filename, var_to_read=None, invalidate_flags=None, var_to_wr
             var_to_write = self.var_names_data_inv[var_to_read]
 
         with xr.open_dataset(filename) as ds:
-
             if not {"station", "time"}.issubset(ds.dims):  # pragma: no cover
                 raise AttributeError("Missing dimensions")
             if not "station_name" in ds:  # pragma: no cover
diff --git a/pyaerocom/io/read_earlinet.py b/pyaerocom/io/read_earlinet.py
index 018ee9e99..637f3ca5f 100755
--- a/pyaerocom/io/read_earlinet.py
+++ b/pyaerocom/io/read_earlinet.py
@@ -229,7 +229,6 @@ def read_file(self, filename, vars_to_retrieve=None, read_err=None, remove_outli
         self.logger.debug(f"Reading file {filename}")
 
         with xarray.open_dataset(filename, engine="netcdf4") as data_in:
-
             # getting the coords since no longer in metadata
             # Put also just in the attributes. not sure why appears twice
             data_out["station_coords"]["longitude"] = data_out["longitude"] = np.float64(