Skip to content

Commit

Permalink
provide aerosol data via pooch (#267)
Browse files Browse the repository at this point in the history
* provide aerosol data via pooch

* fix hash

* fix correct docstring

* changelog
  • Loading branch information
mathause authored Jul 12, 2023
1 parent de55522 commit 80ac9d5
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 2,071 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ New Features
<https://github.com/MESMER-group/mesmer/pull/220>`_). By `Mathias Hauser
<https://github.com/mathause>`_.

- The aerosol data is now automatically downloaded using `pooch <https://www.fatiando.org/pooch/latest/>`__.
(`#267 <https://github.com/MESMER-group/mesmer/pull/267>`_). By `Mathias Hauser
<https://github.com/mathause>`_.


Breaking changes
^^^^^^^^^^^^^^^^

Expand Down
3 changes: 3 additions & 0 deletions data/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# mesmer data

This folder contains auxiliary data for mesmer. They are downloaded on demand using [pooch](https://www.fatiando.org/pooch/latest/).
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ dependencies:
- numpy
- packaging
- pandas<2.0
- pooch
- regionmask>=0.8
- scikit-learn
- sphinx
Expand Down
18 changes: 11 additions & 7 deletions mesmer/calibrate_mesmer/train_gt.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
Functions to train global trend module of MESMER.
"""

import warnings

import numpy as np
import xarray as xr
Expand Down Expand Up @@ -126,7 +127,7 @@ def train_gt(var, targ, esm, time, cfg, save_params=True):
var_all = np.unique(var_all, axis=0)

params_gt["saod"], params_gt["hist"] = train_gt_ic_OLSVOLC(
var_all, gt_lowess_hist, params_gt["time"]["hist"], cfg
var_all, gt_lowess_hist, params_gt["time"]["hist"]
)
elif params_gt["method"] == "LOWESS":
params_gt["hist"] = gt_lowess_hist
Expand Down Expand Up @@ -201,7 +202,7 @@ def train_gt_ic_LOWESS(data):
return gt_lowess, frac_lowess_name


def train_gt_ic_OLSVOLC(var, gt_lowess, time, cfg):
def train_gt_ic_OLSVOLC(var, gt_lowess, time, cfg=None):
"""
Derive global trend (emissions + volcanoes) parameters from single ESM ic ensemble
by adding volcanic spikes to LOWESS trend.
Expand All @@ -214,8 +215,8 @@ def train_gt_ic_OLSVOLC(var, gt_lowess, time, cfg):
1d array of smooth global trend of variable
time : np.ndarray
1d array of years
cfg : module
config file containing metadata needed to load in stratospheric AOD time series
cfg : None
Passing cfg is no longer required.
Returns
-------
Expand All @@ -231,14 +232,17 @@ def train_gt_ic_OLSVOLC(var, gt_lowess, time, cfg):
"""

# specify necessary variables from cfg file
dir_obs = cfg.dir_obs
if cfg is not None:
warnings.warn(
"Passing ``cfg`` to ``train_gt_ic_OLSVOLC`` is no longer necessary",
FutureWarning,
)

nr_runs, nr_ts = var.shape

# account for volcanic eruptions in historical time period
# load in observed stratospheric aerosol optical depth
aod_obs = load_strat_aod(time, dir_obs)
aod_obs = load_strat_aod(time)
# drop "year" coords - aod_obs does not have coords (currently)
aod_obs = aod_obs.drop_vars("year")

Expand Down
25 changes: 25 additions & 0 deletions mesmer/core/_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import pooch

import mesmer


def fetch_remote_data(name):
"""
uses pooch to cache files
"""

cache_dir = pooch.os_cache("mesmer")

REMOTE_RESSOURCE = pooch.create(
path=cache_dir,
# The remote data is on Github
base_url="https://github.com/MESMER-group/mesmer/raw/{version}/data/",
registry={
"isaod_gl_2022.dat": "3d26e78bf0ee96a02c99e2a7a448dafda0ac847a5c914a75c7d9745e95fe68ee",
},
version=f"v{mesmer.__version__}",
version_dev="main",
)

# the file will be downloaded automatically the first time this is run.
return REMOTE_RESSOURCE.fetch(name)
20 changes: 15 additions & 5 deletions mesmer/io/load_obs.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,14 @@
"""

import os
import warnings

import numpy as np
import pandas as pd
import xarray as xr

from mesmer.core._data import fetch_remote_data


def load_obs(targ, prod, lon, lat, cfg, sel_ref="native", ignore_nans=True):
"""Load observations which you previously downloaded.
Expand Down Expand Up @@ -163,15 +166,15 @@ def load_obs_tblend(prod, lon, lat, cfg, sel_ref):
return tblend, time


def load_strat_aod(time, dir_obs):
def load_strat_aod(time, dir_obs=None):
"""Load observed global stratospheric aerosol optical depth time series.
Parameters
----------
time : np.ndarray
1d array of years the AOD time series is required for
dir_obs : str
pathway to observations
dir_obs : None
Deprecated.
Returns
-------
Expand All @@ -184,9 +187,16 @@ def load_strat_aod(time, dir_obs):
cimp6, 1850 - 2005 for cmip5)
"""

path_file = os.path.join(dir_obs, "aerosols", "isaod_gl.dat")
if dir_obs is not None:
warnings.warn(
"The aerosol data is now shipped with mesmer. Passing `dir_obs` to "
"``load_strat_aod`` is no longer necessary",
FutureWarning,
)

filename = fetch_remote_data("isaod_gl_2022.dat")
df = pd.read_csv(
path_file,
filename,
delim_whitespace=True,
skiprows=11,
names=("year", "month", "AOD"),
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ install_requires =
numpy
packaging
pandas < 2.0
pooch
regionmask
scikit-learn
statsmodels
Expand Down
Loading

0 comments on commit 80ac9d5

Please sign in to comment.