From 886356c275b858696557c16dffb257cc5914be83 Mon Sep 17 00:00:00 2001 From: Heiko Klein Date: Fri, 3 May 2024 09:15:17 +0000 Subject: [PATCH 1/2] adding more with-statements where close missing --- pyaerocom/colocation.py | 46 +-- pyaerocom/extras/satellite_l2/aeolus_l2a.py | 7 +- pyaerocom/helpers_landsea_masks.py | 4 +- pyaerocom/io/cachehandler_ungridded.py | 39 +- pyaerocom/io/ghost/reader.py | 126 +++---- pyaerocom/io/read_earlinet.py | 350 +++++++++--------- .../testdata-minimal/create_subsets_ghost.py | 10 +- tests/io/ghost/test_reader.py | 2 +- 8 files changed, 295 insertions(+), 289 deletions(-) diff --git a/pyaerocom/colocation.py b/pyaerocom/colocation.py index dd4348341..3b5f038eb 100644 --- a/pyaerocom/colocation.py +++ b/pyaerocom/colocation.py @@ -972,7 +972,6 @@ def correct_model_stp_coldata(coldata, p0=None, t0=273.15, inplace=False): if not inplace: coldata = coldata.copy() - temp = xr.open_dataset(const.ERA5_SURFTEMP_FILE)["t2m"] arr = coldata.data @@ -987,37 +986,38 @@ def correct_model_stp_coldata(coldata, p0=None, t0=273.15, inplace=False): mintemps = [] maxtemps = [] ps = [] - for i, (lat, lon, alt, name) in enumerate(coords): - logger.info(name, ", Lat", lat, ", Lon", lon) - p = pressure(alt) - logger.info("Alt", alt) - logger.info("P=", p / 100, "hPa") + with xr.open_dataset(const.ERA5_SURFTEMP_FILE)["t2m"] as temp: + for i, (lat, lon, alt, name) in enumerate(coords): + logger.info(name, ", Lat", lat, ", Lon", lon) + p = pressure(alt) + logger.info("Alt", alt) + logger.info("P=", p / 100, "hPa") - ps.append(p / 100) + ps.append(p / 100) - temps = temp.sel(latitude=lat, longitude=lon, method="nearest").data + temps = temp.sel(latitude=lat, longitude=lon, method="nearest").data - meantemps.append(temps.mean()) - mintemps.append(temps.min()) - maxtemps.append(temps.min()) + meantemps.append(temps.mean()) + mintemps.append(temps.min()) + maxtemps.append(temps.min()) - if not len(temps) == len(arr.time): - raise NotImplementedError("Check timestamps") - logger.info("Mean Temp: ", temps.mean() - t0, " C") + if not len(temps) == len(arr.time): + raise NotImplementedError("Check timestamps") + logger.info("Mean Temp: ", temps.mean() - t0, " C") - corrfacs = (p0 / p) * (temps / t0) + corrfacs = (p0 / p) * (temps / t0) - logger.info("Corr fac:", corrfacs.mean(), "+/-", corrfacs.std()) + logger.info("Corr fac:", corrfacs.mean(), "+/-", corrfacs.std()) - cfacs.append(corrfacs.mean()) + cfacs.append(corrfacs.mean()) - # mularr = xr.DataArray(corrfacs) + # mularr = xr.DataArray(corrfacs) - if not arr.station_name.values[i] == name: - raise Exception - elif not arr.dims[1] == "time": - raise Exception - arr[1, :, i] *= corrfacs + if not arr.station_name.values[i] == name: + raise Exception + elif not arr.dims[1] == "time": + raise Exception + arr[1, :, i] *= corrfacs cfacs = np.asarray(cfacs) diff --git a/pyaerocom/extras/satellite_l2/aeolus_l2a.py b/pyaerocom/extras/satellite_l2/aeolus_l2a.py index 569695ae3..1ad9449ad 100755 --- a/pyaerocom/extras/satellite_l2/aeolus_l2a.py +++ b/pyaerocom/extras/satellite_l2/aeolus_l2a.py @@ -3179,16 +3179,15 @@ def read_model_file(self, file_name, topofile=None, vars_to_keep=None): if topofile is not None: # read topography since that needs to be added to the ground following height of the model self.logger.info("reading topography file {}".format(options["topofile"])) - topo_data = xr.open_dataset(options["topofile"]) - topo_altitudes = np.squeeze(topo_data[self.EMEP_TOPO_FILE_VAR_NAME]) - topo_data.close() + with xr.open_dataset(options["topofile"]) as topo_data: + topo_altitudes = np.squeeze(topo_data[self.EMEP_TOPO_FILE_VAR_NAME]) if not os.path.exists(file_name): obj.logger.info(f"file does not exist: {file_name}. skipping colocation ...") return False # read netcdf file if it has not yet been loaded obj.logger.info(f"reading model file {file_name}") - nc_data = xr.open_dataset(file_name) + nc_data = xr.load_dataset(file_name) nc_data[self._LATITUDENAME] = nc_data[self.EMEP_VAR_NAME_DICT[self._LATITUDENAME]] nc_data[self._LONGITUDENAME] = nc_data[self.EMEP_VAR_NAME_DICT[self._LONGITUDENAME]] nc_data[self._TIME_NAME] = nc_data[self.EMEP_VAR_NAME_DICT[self._TIME_NAME]] diff --git a/pyaerocom/helpers_landsea_masks.py b/pyaerocom/helpers_landsea_masks.py index 01d13c609..e841b7af4 100644 --- a/pyaerocom/helpers_landsea_masks.py +++ b/pyaerocom/helpers_landsea_masks.py @@ -149,10 +149,10 @@ def load_region_mask_xr(*regions): for i, fil in enumerate(get_htap_mask_files(*regions)): r = regions[i] if i == 0: - masks = xr.open_dataset(fil)[r + "htap"] + masks = xr.load_dataset(fil)[r + "htap"] name = r else: - masks += xr.open_dataset(fil)[r + "htap"] + masks += xr.load_dataset(fil)[r + "htap"] name += f"-{r}" if masks is not None: mask = masks.where(masks < 1, 1) diff --git a/pyaerocom/io/cachehandler_ungridded.py b/pyaerocom/io/cachehandler_ungridded.py index 0ef3427e1..79c85c3eb 100644 --- a/pyaerocom/io/cachehandler_ungridded.py +++ b/pyaerocom/io/cachehandler_ungridded.py @@ -235,35 +235,36 @@ class (which should not happen) delete_existing = const.RM_CACHE_OUTDATED if not force_use_outdated else False - in_handle = open(fp, "rb") - if force_use_outdated: - last_meta = pickle.load(in_handle) - assert len(last_meta) == len(self.CACHE_HEAD_KEYS) - ok = True - else: - try: - ok = self._check_pkl_head_vs_database(in_handle) - except Exception as e: - ok = False - delete_existing = True - logger.exception( - f"File error in cached data file {fp}. " - f"File will be removed and data reloaded. Error: {repr(e)}" - ) + with open(fp, "rb") as in_handle: + if force_use_outdated: + last_meta = pickle.load(in_handle) + assert len(last_meta) == len(self.CACHE_HEAD_KEYS) + ok = True + else: + try: + ok = self._check_pkl_head_vs_database(in_handle) + except Exception as e: + ok = False + delete_existing = True + logger.exception( + f"File error in cached data file {fp}. " + f"File will be removed and data reloaded. Error: {repr(e)}" + ) + if ok: + # everything is okay, or forced + data = pickle.load(in_handle) + if not ok: - # TODO: Should we delete the cache file if it is outdated ??? + # Delete the cache file if it is outdated, after handle is closed logger.info( f"Aborting reading cache file {fp}. Aerocom database " f"or pyaerocom version has changed compared to cached version" ) - in_handle.close() if delete_existing: # something was wrong logger.info(f"Deleting outdated cache file: {fp}") os.remove(fp) return False - # everything is okay - data = pickle.load(in_handle) if not isinstance(data, UngriddedData): raise TypeError( f"Unexpected data type stored in cache file, need instance of UngriddedData, " diff --git a/pyaerocom/io/ghost/reader.py b/pyaerocom/io/ghost/reader.py index facd2f322..31fb0a16a 100644 --- a/pyaerocom/io/ghost/reader.py +++ b/pyaerocom/io/ghost/reader.py @@ -339,68 +339,70 @@ def read_file(self, filename, var_to_read=None, invalidate_flags=None, var_to_wr if var_to_write is None: var_to_write = self.var_names_data_inv[var_to_read] - ds = xr.open_dataset(filename) - - if not {"station", "time"}.issubset(ds.dims): # pragma: no cover - raise AttributeError("Missing dimensions") - if not "station_name" in ds: # pragma: no cover - raise AttributeError("No variable station_name found") - - stats = [] - - # get all station metadata values as numpy arrays, since xarray isel, - # __getitem__, __getattr__ are slow... this can probably be solved - # more elegantly - meta_glob = {} - for meta_key in self.META_KEYS: - try: - meta_glob[meta_key] = ds[meta_key].values - except KeyError: # pragma: no cover - logger.warning(f"No such metadata key in GHOST data file: {Path(filename).name}") - - for meta_key, to_unit in self.CONVERT_UNITS_META.items(): - from_unit = ds[meta_key].attrs["units"] - - if from_unit != to_unit: - cfac = cf_units.Unit(from_unit).convert(1, to_unit) - meta_glob[meta_key] *= cfac - - tvals = ds["time"].values - - vardata = ds[var_to_read] # DataArray - varinfo = vardata.attrs - - # ToDo: it is important that station comes first since we use numpy - # indexing below and not xarray.isel or similar, due to performance - # issues. This may need to be updated in case of profile data. - assert vardata.dims == ("station", "time") - data_np = vardata.values - - # evaluate flags - invalid = self._eval_flags(vardata, invalidate_flags, ds) - - for idx in ds.station.values: - stat = {} - meta = StationMetaData() - meta["ts_type"] = self.TS_TYPE - stat["time"] = tvals - stat["meta"] = meta - meta["var_info"] = {} - - for meta_key, vals in meta_glob.items(): - meta[meta_key] = vals[idx] - - # vardata = subset[var_name] - stat[var_to_write] = data_np[idx] - - meta["var_info"][var_to_write] = {} - meta["var_info"][var_to_write].update(varinfo) - - # import flagdata (2D array with time and flag dimensions) - # invalid = self._eval_flags(vardata, invalidate_flags) - stat["data_flagged"] = {} - stat["data_flagged"][var_to_write] = invalid[idx] - stats.append(stat) + with xr.open_dataset(filename) as ds: + + if not {"station", "time"}.issubset(ds.dims): # pragma: no cover + raise AttributeError("Missing dimensions") + if not "station_name" in ds: # pragma: no cover + raise AttributeError("No variable station_name found") + + stats = [] + + # get all station metadata values as numpy arrays, since xarray isel, + # __getitem__, __getattr__ are slow... this can probably be solved + # more elegantly + meta_glob = {} + for meta_key in self.META_KEYS: + try: + meta_glob[meta_key] = ds[meta_key].values + except KeyError: # pragma: no cover + logger.warning( + f"No such metadata key in GHOST data file: {Path(filename).name}" + ) + + for meta_key, to_unit in self.CONVERT_UNITS_META.items(): + from_unit = ds[meta_key].attrs["units"] + + if from_unit != to_unit: + cfac = cf_units.Unit(from_unit).convert(1, to_unit) + meta_glob[meta_key] *= cfac + + tvals = ds["time"].values + + vardata = ds[var_to_read] # DataArray + varinfo = vardata.attrs + + # ToDo: it is important that station comes first since we use numpy + # indexing below and not xarray.isel or similar, due to performance + # issues. This may need to be updated in case of profile data. + assert vardata.dims == ("station", "time") + data_np = vardata.values + + # evaluate flags + invalid = self._eval_flags(vardata, invalidate_flags, ds) + + for idx in ds.station.values: + stat = {} + meta = StationMetaData() + meta["ts_type"] = self.TS_TYPE + stat["time"] = tvals + stat["meta"] = meta + meta["var_info"] = {} + + for meta_key, vals in meta_glob.items(): + meta[meta_key] = vals[idx] + + # vardata = subset[var_name] + stat[var_to_write] = data_np[idx] + + meta["var_info"][var_to_write] = {} + meta["var_info"][var_to_write].update(varinfo) + + # import flagdata (2D array with time and flag dimensions) + # invalid = self._eval_flags(vardata, invalidate_flags) + stat["data_flagged"] = {} + stat["data_flagged"][var_to_write] = invalid[idx] + stats.append(stat) return stats diff --git a/pyaerocom/io/read_earlinet.py b/pyaerocom/io/read_earlinet.py index b6931cf37..018ee9e99 100755 --- a/pyaerocom/io/read_earlinet.py +++ b/pyaerocom/io/read_earlinet.py @@ -228,192 +228,196 @@ def read_file(self, filename, vars_to_retrieve=None, read_err=None, remove_outli # Iterate over the lines of the file self.logger.debug(f"Reading file {filename}") - data_in = xarray.open_dataset(filename, engine="netcdf4") - - # getting the coords since no longer in metadata - # Put also just in the attributes. not sure why appears twice - data_out["station_coords"]["longitude"] = data_out["longitude"] = np.float64( - data_in["longitude"].values - ) - data_out["station_coords"]["latitude"] = data_out["latitude"] = np.float64( - data_in["latitude"].values - ) - data_out["altitude"] = np.float64( - data_in[ + with xarray.open_dataset(filename, engine="netcdf4") as data_in: + + # getting the coords since no longer in metadata + # Put also just in the attributes. not sure why appears twice + data_out["station_coords"]["longitude"] = data_out["longitude"] = np.float64( + data_in["longitude"].values + ) + data_out["station_coords"]["latitude"] = data_out["latitude"] = np.float64( + data_in["latitude"].values + ) + data_out["altitude"] = np.float64( + data_in[ + "altitude" + ].values # altitude is defined in EARLINET in terms of altitude above sea level + ) # Note altitude is an array for the data, station altitude is different + data_out["station_coords"]["altitude"] = np.float64(data_in.station_altitude) + data_out["altitude_attrs"] = data_in[ "altitude" - ].values # altitude is defined in EARLINET in terms of altitude above sea level - ) # Note altitude is an array for the data, station altitude is different - data_out["station_coords"]["altitude"] = np.float64(data_in.station_altitude) - data_out["altitude_attrs"] = data_in[ - "altitude" - ].attrs # get attrs for altitude units + extra - - # get intersection of metadaa in ddataa_out and data_in - for k, v in self.META_NAMES_FILE.items(): - if v in self.META_NEEDED: - _meta = data_in.attrs[v] - else: - try: - _meta = data_in.attrs[v] - except Exception: # pragma: no cover - _meta = None - data_out[k] = _meta - - # get metadata expected in StationData but not in data_in's metadata - data_out["wavelength_emis"] = data_in["wavelength"] - data_out["shots"] = np.float64(data_in["shots"]) - data_out["zenith_angle"] = np.float64(data_in["zenith_angle"]) - data_out["filename"] = filename - if "Lev02" in filename: - data_out["data_level"] = 2 - loc_split = data_in.attrs["location"].split(", ") - data_out["station_name"] = loc_split[0] - if len(loc_split) > 1: - data_out["country"] = loc_split[1] - - dtime = pd.Timestamp(data_in.measurement_start_datetime).to_numpy().astype("datetime64[s]") - stop = pd.Timestamp(data_in.measurement_stop_datetime).to_numpy().astype("datetime64[s]") - - # in case measurement goes over midnight into a new day - if stop < dtime: - stop = stop + np.timedelta64(1, "[D]") - - data_out["dtime"] = [dtime] - data_out["stopdtime"] = [stop] - data_out["has_zdust"] = False + ].attrs # get attrs for altitude units + extra - for var in vars_to_read: - data_out["var_info"][var] = {} - err_read = False - unit_ok = False - outliers_removed = False - has_altitude = False - - netcdf_var_name = self.VAR_NAMES_FILE[var] - # check if the desired variable is in the file - if netcdf_var_name not in data_in.variables: - self.logger.warning(f"Variable {var} not found in file {filename}") - continue + # get intersection of metadaa in ddataa_out and data_in + for k, v in self.META_NAMES_FILE.items(): + if v in self.META_NEEDED: + _meta = data_in.attrs[v] + else: + try: + _meta = data_in.attrs[v] + except Exception: # pragma: no cover + _meta = None + data_out[k] = _meta + + # get metadata expected in StationData but not in data_in's metadata + data_out["wavelength_emis"] = data_in["wavelength"] + data_out["shots"] = np.float64(data_in["shots"]) + data_out["zenith_angle"] = np.float64(data_in["zenith_angle"]) + data_out["filename"] = filename + if "Lev02" in filename: + data_out["data_level"] = 2 + loc_split = data_in.attrs["location"].split(", ") + data_out["station_name"] = loc_split[0] + if len(loc_split) > 1: + data_out["country"] = loc_split[1] + + dtime = ( + pd.Timestamp(data_in.measurement_start_datetime).to_numpy().astype("datetime64[s]") + ) + stop = ( + pd.Timestamp(data_in.measurement_stop_datetime).to_numpy().astype("datetime64[s]") + ) - info = var_info[var] - # xarray.DataArray - arr = data_in.variables[netcdf_var_name] - # the actual data as numpy array (or float if 0-D data, e.g. zdust) - val = np.squeeze(np.float64(arr)) # squeeze to 1D array - - # CONVERT UNIT - unit = None - - unames = self.VAR_UNIT_NAMES[netcdf_var_name] - for u in unames: - if u in arr.attrs: - unit = arr.attrs[u] - if unit is None: - raise DataUnitError(f"Unit of {var} could not be accessed in file {filename}") - unit_fac = None - try: - to_unit = self._var_info[var].units - unit_fac = get_unit_conversion_fac(unit, to_unit) - val *= unit_fac - unit = to_unit - unit_ok = True - except Exception as e: - logger.warning( - f"Failed to convert unit of {var} in file {filename} (Earlinet): " - f"Error: {repr(e)}" - ) + # in case measurement goes over midnight into a new day + if stop < dtime: + stop = stop + np.timedelta64(1, "[D]") + + data_out["dtime"] = [dtime] + data_out["stopdtime"] = [stop] + data_out["has_zdust"] = False + + for var in vars_to_read: + data_out["var_info"][var] = {} + err_read = False + unit_ok = False + outliers_removed = False + has_altitude = False + + netcdf_var_name = self.VAR_NAMES_FILE[var] + # check if the desired variable is in the file + if netcdf_var_name not in data_in.variables: + self.logger.warning(f"Variable {var} not found in file {filename}") + continue - # import errors if applicable - err = np.nan - if read_err and var in self.ERR_VARNAMES: - err_name = self.ERR_VARNAMES[var] - if err_name in data_in.variables: - err = np.squeeze(np.float64(data_in.variables[err_name])) - if unit_ok: - err *= unit_fac - err_read = True - - # 1D variable - if var == "zdust": - if not val.ndim == 0: - raise ValueError("Fatal: dust layer height data must be single value") - - if unit_ok and info.minimum < val < info.maximum: - logger.warning(f"zdust value {val} out of range, setting to NaN") - val = np.nan - - if np.isnan(val): - self.logger.warning( - f"Invalid value of variable zdust in file {filename}. Skipping...!" + info = var_info[var] + # xarray.DataArray + arr = data_in.variables[netcdf_var_name] + # the actual data as numpy array (or float if 0-D data, e.g. zdust) + val = np.squeeze(np.float64(arr)) # squeeze to 1D array + + # CONVERT UNIT + unit = None + + unames = self.VAR_UNIT_NAMES[netcdf_var_name] + for u in unames: + if u in arr.attrs: + unit = arr.attrs[u] + if unit is None: + raise DataUnitError(f"Unit of {var} could not be accessed in file {filename}") + unit_fac = None + try: + to_unit = self._var_info[var].units + unit_fac = get_unit_conversion_fac(unit, to_unit) + val *= unit_fac + unit = to_unit + unit_ok = True + except Exception as e: + logger.warning( + f"Failed to convert unit of {var} in file {filename} (Earlinet): " + f"Error: {repr(e)}" ) - continue - data_out["has_zdust"] = True - data_out[var] = val + # import errors if applicable + err = np.nan + if read_err and var in self.ERR_VARNAMES: + err_name = self.ERR_VARNAMES[var] + if err_name in data_in.variables: + err = np.squeeze(np.float64(data_in.variables[err_name])) + if unit_ok: + err *= unit_fac + err_read = True + + # 1D variable + if var == "zdust": + if not val.ndim == 0: + raise ValueError("Fatal: dust layer height data must be single value") + + if unit_ok and info.minimum < val < info.maximum: + logger.warning(f"zdust value {val} out of range, setting to NaN") + val = np.nan + + if np.isnan(val): + self.logger.warning( + f"Invalid value of variable zdust in file {filename}. Skipping...!" + ) + continue - else: - if not val.ndim == 1: - raise ValueError("Extinction data must be one dimensional") - elif len(val) == 0: - continue # no data - # Remove NaN equivalent values - val[val > self._MAX_VAL_NAN] = np.nan - - wvlg = var_info[var].wavelength_nm - wvlg_str = self.META_NAMES_FILE["wavelength_emis"] - - if not wvlg == float(data_in[wvlg_str]): - self.logger.info("No wavelength match") - continue + data_out["has_zdust"] = True + data_out[var] = val - alt_id = self.ALTITUDE_ID - alt_data = data_in.variables[alt_id] + else: + if not val.ndim == 1: + raise ValueError("Extinction data must be one dimensional") + elif len(val) == 0: + continue # no data + # Remove NaN equivalent values + val[val > self._MAX_VAL_NAN] = np.nan - alt_vals = np.float64(alt_data) - alt_unit = alt_data.attrs[self.VAR_UNIT_NAMES[alt_id]] - to_alt_unit = const.VARS["alt"].units - if not alt_unit == to_alt_unit: - try: - alt_unit_fac = get_unit_conversion_fac(alt_unit, to_alt_unit) - alt_vals *= alt_unit_fac - alt_unit = to_alt_unit - except Exception as e: - self.logger.warning(f"Failed to convert unit: {repr(e)}") - has_altitude = True - - # remove outliers from data, if applicable - if remove_outliers and unit_ok: - # REMOVE OUTLIERS - outlier_mask = np.logical_or(val < info.minimum, val > info.maximum) - val[outlier_mask] = np.nan + wvlg = var_info[var].wavelength_nm + wvlg_str = self.META_NAMES_FILE["wavelength_emis"] + + if not wvlg == float(data_in[wvlg_str]): + self.logger.info("No wavelength match") + continue + alt_id = self.ALTITUDE_ID + alt_data = data_in.variables[alt_id] + + alt_vals = np.float64(alt_data) + alt_unit = alt_data.attrs[self.VAR_UNIT_NAMES[alt_id]] + to_alt_unit = const.VARS["alt"].units + if not alt_unit == to_alt_unit: + try: + alt_unit_fac = get_unit_conversion_fac(alt_unit, to_alt_unit) + alt_vals *= alt_unit_fac + alt_unit = to_alt_unit + except Exception as e: + self.logger.warning(f"Failed to convert unit: {repr(e)}") + has_altitude = True + + # remove outliers from data, if applicable + if remove_outliers and unit_ok: + # REMOVE OUTLIERS + outlier_mask = np.logical_or(val < info.minimum, val > info.maximum) + val[outlier_mask] = np.nan + + if err_read: + err[outlier_mask] = np.nan + outliers_removed = True + # remove outliers from errors if applicable if err_read: - err[outlier_mask] = np.nan - outliers_removed = True - # remove outliers from errors if applicable - if err_read: - err[err > self._MAX_VAL_NAN] = np.nan - - # create instance of ProfileData - profile = VerticalProfile( - data=val, - altitude=alt_vals, - dtime=dtime, - var_name=var, - data_err=err, - var_unit=unit, - altitude_unit=alt_unit, - ) + err[err > self._MAX_VAL_NAN] = np.nan + + # create instance of ProfileData + profile = VerticalProfile( + data=val, + altitude=alt_vals, + dtime=dtime, + var_name=var, + data_err=err, + var_unit=unit, + altitude_unit=alt_unit, + ) - # Write everything into profile - data_out[var] = profile + # Write everything into profile + data_out[var] = profile - data_out["var_info"][var].update( - unit_ok=unit_ok, - err_read=err_read, - outliers_removed=outliers_removed, - has_altitute=has_altitude, - ) + data_out["var_info"][var].update( + unit_ok=unit_ok, + err_read=err_read, + outliers_removed=outliers_removed, + has_altitute=has_altitude, + ) return data_out def read( diff --git a/pyaerocom/scripts/testdata-minimal/create_subsets_ghost.py b/pyaerocom/scripts/testdata-minimal/create_subsets_ghost.py index e70270c49..a5a5e8864 100644 --- a/pyaerocom/scripts/testdata-minimal/create_subsets_ghost.py +++ b/pyaerocom/scripts/testdata-minimal/create_subsets_ghost.py @@ -62,10 +62,10 @@ print(file_out) assert os.path.exists(file_in) - ds = xr.open_dataset(file_in) - subset = ds.isel(station=slice(0, numst)) - if numts is not None: - subset = subset.isel(time=slice(0, numts)) + with xr.open_dataset(file_in) as ds: + subset = ds.isel(station=slice(0, numst)) + if numts is not None: + subset = subset.isel(time=slice(0, numts)) - subset.to_netcdf(file_out) + subset.to_netcdf(file_out) print("Saved") diff --git a/tests/io/ghost/test_reader.py b/tests/io/ghost/test_reader.py index ba4932587..491df883c 100644 --- a/tests/io/ghost/test_reader.py +++ b/tests/io/ghost/test_reader.py @@ -132,7 +132,7 @@ def test__eval_flags_slice(self): reader = self.default_reader file = reader.files[-1] assert Path(file).name == "sconco3_201810.nc" - ds = xr.open_dataset(file) + ds = xr.load_dataset(file) flagvar = "qa" numvalid = 3 From 8b17ae03288c2bd1c0daad5c929639b359152637 Mon Sep 17 00:00:00 2001 From: Heiko Klein Date: Fri, 3 May 2024 09:25:22 +0000 Subject: [PATCH 2/2] linting --- pyaerocom/io/ghost/reader.py | 1 - pyaerocom/io/read_earlinet.py | 1 - 2 files changed, 2 deletions(-) diff --git a/pyaerocom/io/ghost/reader.py b/pyaerocom/io/ghost/reader.py index 31fb0a16a..b6eaf6064 100644 --- a/pyaerocom/io/ghost/reader.py +++ b/pyaerocom/io/ghost/reader.py @@ -340,7 +340,6 @@ def read_file(self, filename, var_to_read=None, invalidate_flags=None, var_to_wr var_to_write = self.var_names_data_inv[var_to_read] with xr.open_dataset(filename) as ds: - if not {"station", "time"}.issubset(ds.dims): # pragma: no cover raise AttributeError("Missing dimensions") if not "station_name" in ds: # pragma: no cover diff --git a/pyaerocom/io/read_earlinet.py b/pyaerocom/io/read_earlinet.py index 018ee9e99..637f3ca5f 100755 --- a/pyaerocom/io/read_earlinet.py +++ b/pyaerocom/io/read_earlinet.py @@ -229,7 +229,6 @@ def read_file(self, filename, vars_to_retrieve=None, read_err=None, remove_outli self.logger.debug(f"Reading file {filename}") with xarray.open_dataset(filename, engine="netcdf4") as data_in: - # getting the coords since no longer in metadata # Put also just in the attributes. not sure why appears twice data_out["station_coords"]["longitude"] = data_out["longitude"] = np.float64(