Skip to content

Commit

Permalink
Merge branch 'main' into test_train_and_emulate_legacy
Browse files Browse the repository at this point in the history
  • Loading branch information
mathause committed Sep 4, 2023
2 parents 2c34b6a + 4b66b70 commit 5454d4b
Show file tree
Hide file tree
Showing 25 changed files with 429 additions and 103 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,15 @@ New Features
- Added functions to calculate the weighted global mean (`#220
<https://github.com/MESMER-group/mesmer/pull/220>`_). By `Mathias Hauser
<https://github.com/mathause>`_.
- Added functions to wrap arrays to [-180, 180) and [0, 360), respectively (`#270
<https://github.com/MESMER-group/mesmer/pull/270>`_ and `#273
<https://github.com/MESMER-group/mesmer/pull/273>`_). By `Mathias Hauser
<https://github.com/mathause>`_.

- The aerosol data is now automatically downloaded using `pooch <https://www.fatiando.org/pooch/latest/>`__.
(`#267 <https://github.com/MESMER-group/mesmer/pull/267>`_). By `Mathias Hauser
<https://github.com/mathause>`_.


Breaking changes
^^^^^^^^^^^^^^^^
Expand Down Expand Up @@ -150,6 +159,8 @@ Internal Changes

By `Mathias Hauser <https://github.com/mathause>`_.

- Moved the climate model data manipulation functions (`#237 <https://github.com/MESMER-group/mesmer/issues/237>`_).
By `Mathias Hauser <https://github.com/mathause>`_.

v0.8.3 - 2021-12-23
-------------------
Expand Down
3 changes: 3 additions & 0 deletions data/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# mesmer data

This folder contains auxiliary data for mesmer. They are downloaded on demand using [pooch](https://www.fatiando.org/pooch/latest/).
34 changes: 31 additions & 3 deletions ...e-grid/observations/aerosols/isaod_gl.dat → data/isaod_gl_2022.dat
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# scripturl01 :: https://climexp.knmi.nl/getindices.cgi?STATION=stratospheric_AOD&TYPE=i&WMO=NASAData/saod_gl&id=$id
# scripturl01 :: http://climexp.knmi.nl/getindices.cgi?STATION=stratospheric_AOD&TYPE=i&WMO=NASAData/saod_gl&id=id
# global Optical Thickness at 550 nm
# from <a href="http://data.giss.nasa.gov/modelforce/strataer/">NASA/GISS</a>
# AOD [1] stratospheric aerosol optical depth
Expand All @@ -7,7 +7,7 @@
# contact :: https://www.giss.nasa.gov/staff/makiko_sato.html
# references :: Bourassa, A.E., A. Robock, et al. 2012: Large volcanic aerosol load in the stratosphere linked to Asian monsoon transport. Science 337, 78-81, doi:10.1126/science.1219371
# source_url :: https://data.giss.nasa.gov/modelforce/strataer/
# history :: retrieved and converted 2020-08-14 17:12:30
# history :: retrieved and converted 2023-07-10 21:49:40
# climexp_url :: https://climexp.knmi.nl/getindices.cgi?NASAData/saod_gl
1850 1 0.004
1850 2 0.004
Expand Down Expand Up @@ -2056,4 +2056,32 @@
2020 5 0.000
2020 6 0.000
2020 7 0.000

2020 8 0.000
2020 9 0.000
2020 10 0.000
2020 11 0.000
2020 12 0.000
2021 1 0.000
2021 2 0.000
2021 3 0.000
2021 4 0.000
2021 5 0.000
2021 6 0.000
2021 7 0.000
2021 8 0.000
2021 9 0.000
2021 10 0.000
2021 11 0.000
2021 12 0.000
2022 1 0.000
2022 2 0.000
2022 3 0.000
2022 4 0.000
2022 5 0.000
2022 6 0.000
2022 7 0.000
2022 8 0.000
2022 9 0.000
2022 10 0.000
2022 11 0.000
2022 12 0.000
20 changes: 11 additions & 9 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,17 @@ Data manipulation
.. autosummary::
:toctree: generated/

~xarray_utils.grid.stack_lat_lon
~xarray_utils.grid.unstack_lat_lon_and_align
~xarray_utils.grid.unstack_lat_lon
~xarray_utils.grid.align_to_coords
~xarray_utils.mask.mask_ocean_fraction
~xarray_utils.mask.mask_ocean
~xarray_utils.mask.mask_antarctica
~xarray_utils.global_mean.lat_weights
~xarray_utils.global_mean.weighted_mean
~grid.wrap_to_180
~grid.wrap_to_360
~grid.stack_lat_lon
~grid.unstack_lat_lon_and_align
~grid.unstack_lat_lon
~grid.align_to_coords
~mask.mask_ocean_fraction
~mask.mask_ocean
~mask.mask_antarctica
~globmean.lat_weights
~globmean.weighted_mean

Legacy functions
================
Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ dependencies:
- numpy
- packaging
- pandas<2.0
- pooch
- regionmask>=0.8
- scikit-learn
- sphinx
Expand Down
15 changes: 13 additions & 2 deletions mesmer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,20 @@
The mesmer package provides tools to train the MESMER emulator, create emulations, and
analyze the results.
"""
# flake8: noqa

from . import calibrate_mesmer, create_emulations, io, utils, xarray_utils
from . import calibrate_mesmer, core, create_emulations, io, utils
from .core import globmean, grid, mask

__all__ = [
"calibrate_mesmer",
"core",
"create_emulations",
"grid",
"io",
"mask",
"utils",
"globmean",
]

try:
from importlib.metadata import version as _get_version
Expand Down
18 changes: 11 additions & 7 deletions mesmer/calibrate_mesmer/train_gt.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
Functions to train global trend module of MESMER.
"""

import warnings

import numpy as np
import xarray as xr
Expand Down Expand Up @@ -126,7 +127,7 @@ def train_gt(var, targ, esm, time, cfg, save_params=True):
var_all = np.unique(var_all, axis=0)

params_gt["saod"], params_gt["hist"] = train_gt_ic_OLSVOLC(
var_all, gt_lowess_hist, params_gt["time"]["hist"], cfg
var_all, gt_lowess_hist, params_gt["time"]["hist"]
)
elif params_gt["method"] == "LOWESS":
params_gt["hist"] = gt_lowess_hist
Expand Down Expand Up @@ -201,7 +202,7 @@ def train_gt_ic_LOWESS(data):
return gt_lowess, frac_lowess_name


def train_gt_ic_OLSVOLC(var, gt_lowess, time, cfg):
def train_gt_ic_OLSVOLC(var, gt_lowess, time, cfg=None):
"""
Derive global trend (emissions + volcanoes) parameters from single ESM ic ensemble
by adding volcanic spikes to LOWESS trend.
Expand All @@ -214,8 +215,8 @@ def train_gt_ic_OLSVOLC(var, gt_lowess, time, cfg):
1d array of smooth global trend of variable
time : np.ndarray
1d array of years
cfg : module
config file containing metadata needed to load in stratospheric AOD time series
cfg : None
Passing cfg is no longer required.
Returns
-------
Expand All @@ -231,14 +232,17 @@ def train_gt_ic_OLSVOLC(var, gt_lowess, time, cfg):
"""

# specify necessary variables from cfg file
dir_obs = cfg.dir_obs
if cfg is not None:
warnings.warn(
"Passing ``cfg`` to ``train_gt_ic_OLSVOLC`` is no longer necessary",
FutureWarning,
)

nr_runs, nr_ts = var.shape

# account for volcanic eruptions in historical time period
# load in observed stratospheric aerosol optical depth
aod_obs = load_strat_aod(time, dir_obs)
aod_obs = load_strat_aod(time)
# drop "year" coords - aod_obs does not have coords (currently)
aod_obs = aod_obs.drop_vars("year")

Expand Down
Empty file added mesmer/core/__init__.py
Empty file.
25 changes: 25 additions & 0 deletions mesmer/core/_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import pooch

import mesmer


def fetch_remote_data(name):
"""
uses pooch to cache files
"""

cache_dir = pooch.os_cache("mesmer")

REMOTE_RESSOURCE = pooch.create(
path=cache_dir,
# The remote data is on Github
base_url="https://github.com/MESMER-group/mesmer/raw/{version}/data/",
registry={
"isaod_gl_2022.dat": "3d26e78bf0ee96a02c99e2a7a448dafda0ac847a5c914a75c7d9745e95fe68ee",
},
version=f"v{mesmer.__version__}",
version_dev="main",
)

# the file will be downloaded automatically the first time this is run.
return REMOTE_RESSOURCE.fetch(name)
File renamed without changes.
70 changes: 70 additions & 0 deletions mesmer/xarray_utils/grid.py → mesmer/core/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,76 @@
from packaging.version import Version


def _lon_to_180(lon):

with xr.set_options(keep_attrs=True):
lon = ((lon + 180) % 360) - 180

if isinstance(lon, xr.DataArray):
lon = lon.assign_coords({lon.name: lon})

return lon


def _lon_to_360(lon):

with xr.set_options(keep_attrs=True):
lon = lon % 360

if isinstance(lon, xr.DataArray):
lon = lon.assign_coords({lon.name: lon})

return lon


def wrap_to_180(obj, lon_name="lon"):
"""
wrap longitude coordinates to [-180..180)
Parameters
----------
obj : xr.Dataset or xr.DataArray
object with longitude coordinates
lon : str, default: "lon"
name of the longitude ('lon', 'longitude', ...)
Returns
-------
wrapped : Dataset
Another dataset array wrapped around.
"""

new_lon = _lon_to_180(obj[lon_name])

obj = obj.assign_coords(**{lon_name: new_lon})
obj = obj.sortby(lon_name)

return obj


def wrap_to_360(obj, lon_name="lon"):
"""
wrap longitude coordinates to [0..360)
Parameters
----------
obj : xr.Dataset or xr.DataArray
object with longitude coordinates
lon : str, default: "lon"
name of the longitude ('lon', 'longitude', ...)
Returns
-------
wrapped : Dataset
Another dataset array wrapped around.
"""

new_lon = _lon_to_360(obj[lon_name])

obj = obj.assign_coords(**{lon_name: new_lon})
obj = obj.sortby(lon_name)

return obj


def stack_lat_lon(
data,
*,
Expand Down
10 changes: 5 additions & 5 deletions mesmer/xarray_utils/mask.py → mesmer/core/mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import regionmask
import xarray as xr

import mesmer.utils
import mesmer


def _where_if_dim(obj, cond, dims):
Expand Down Expand Up @@ -46,7 +46,7 @@ def mask_ocean_fraction(data, threshold, *, x_coords="lon", y_coords="lat"):
- Uses the 1:110m land mask from Natural Earth (http://www.naturalearthdata.com).
- The fractional overlap of individual grid points and the land mask can only be
computed for regularly-spaced 1D x- and y-coordinates. For irregularly spaced
coordinates use :py:func:`mesmer.xarray_utils.mask_land`.
coordinates use :py:func:`mesmer.mask.mask_land`.
"""

if np.ndim(threshold) != 0 or (threshold < 0) or (threshold > 1):
Expand All @@ -56,10 +56,10 @@ def mask_ocean_fraction(data, threshold, *, x_coords="lon", y_coords="lat"):
land_110 = regionmask.defined_regions.natural_earth_v5_0_0.land_110

try:
mask_fraction = mesmer.utils.regionmaskcompat.mask_3D_frac_approx(
mask_fraction = mesmer.core.regionmaskcompat.mask_3D_frac_approx(
land_110, data[x_coords], data[y_coords]
)
except mesmer.utils.regionmaskcompat.InvalidCoordsError as e:
except mesmer.core.regionmaskcompat.InvalidCoordsError as e:
raise ValueError(
"Cannot calculate fractional mask for irregularly-spaced coords - use "
"``mask_land`` instead."
Expand Down Expand Up @@ -95,7 +95,7 @@ def mask_ocean(data, *, x_coords="lon", y_coords="lat"):
-----
- Uses the 1:110m land mask from Natural Earth (http://www.naturalearthdata.com).
- Whether a grid cell is in the ocean or on land is based on its center. For
regularly spaced coordinates use :py:func:`mesmer.xarray_utils.mask_land_fraction`.
regularly spaced coordinates use :py:func:`mesmer.mask.mask_land_fraction`.
"""

# TODO: allow other masks?
Expand Down
File renamed without changes.
40 changes: 16 additions & 24 deletions mesmer/io/_load_cmipng.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import numpy as np
import xarray as xr

import mesmer


def extract_time_lon_lat_wgt3d(data):
"""
Expand Down Expand Up @@ -283,38 +285,28 @@ def load_cmipng_file(run_path, gen, scen):
# rename to time for consistency with cmip6
data = data.rename({"year": "time"})

# roll so land in center
data = data.roll(lon=72, roll_coords=True)

# assign_coords so that labels = reasonable
data = data.assign_coords(lon=(((data.lon + 180) % 360) - 180))

# extract ens member
run = int(data.attrs["source_ensemble"].split("r")[1].split("i")[0])

if gen == 6:

run_path_hist = run_path.replace(scen, "historical")
paths = [run_path_hist, run_path]
preprocess = None

if "ssp534-over" in run_path:
run_path_ssp_534over = run_path
run_path_ssp_585 = run_path.replace(scen, "ssp585")
run_path_hist = run_path.replace(scen, "historical")
data = xr.open_mfdataset(
[run_path_hist, run_path_ssp_585, run_path_ssp_534over],
combine="by_coords",
preprocess=preprocess_ssp534over,
)
else: # for every other scenario
run_path_ssp = run_path
run_path_hist = run_path.replace(scen, "historical")
data = xr.open_mfdataset([run_path_hist, run_path_ssp], combine="by_coords")

# roll so land in center
data = data.roll(lon=72, roll_coords=True)

# assign_coords so that labels = reasonable
data = data.assign_coords(lon=(((data.lon + 180) % 360) - 180))
data = data.sortby(["lat", "lon"])

paths.append(run_path_ssp_585)
preprocess = preprocess_ssp534over

data = xr.open_mfdataset(paths, combine="by_coords", preprocess=preprocess)

run = data.attrs["realization_index"]

# wrap data to [-180, 180)
data = mesmer.grid.wrap_to_180(data)

return data, run


Expand Down
Loading

0 comments on commit 5454d4b

Please sign in to comment.