Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve Growing Season Length and fix rle1d #349

Merged
merged 3 commits into from
Jan 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ History
0.14.x
------
* Fix in `subset.wrap_lons_and_split_at_greenwich` to preserve multi-region dataframes.
* Improve the memory use of `indices.growing_season_length`.


0.13.x (2020-01-10)
Expand Down
62 changes: 23 additions & 39 deletions tests/test_indices.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,48 +334,32 @@ def test_simple(self, tas_series):


class TestGrowingSeasonLength:
def test_simple(self, tas_series):
@pytest.mark.parametrize(
"d1,d2,expected",
[
("1950-01-01", "1951-01-01", 0), # No growing season
("2000-01-01", "2000-12-31", 365), # All year growing season
("2000-07-10", "2001-01-01", np.nan), # End happens before start
("2000-06-15", "2001-01-01", 199), # No end
("2000-06-15", "2000-07-15", 31), # Normal case
],
)
def test_simple(self, tas_series, d1, d2, expected):
# test for different growing length

# generate 5 years of data
a = np.zeros(366 * 2 + 365 * 3)
tas = tas_series(a, start="2000/1/1")

# 2000 : no growing season

# 2001 : growing season all year
d1 = "27-12-2000"
d2 = "31-12-2001"
buffer = tas.sel(time=slice(d1, d2))
tas = tas.where(~tas.time.isin(buffer.time), 280)

# 2002 : growing season in June only
d1 = "6-1-2002"
d2 = "6-10-2002"
buffer = tas.sel(time=slice(d1, d2))
tas = tas.where(~tas.time.isin(buffer.time), 280)
#
# comment:
# correct answer should be 10 (i.e. there are 10 days
# with tas > 5 degC) but current definition imposes end
# of growing season to be equal or later than July 1st.

# growing season in Aug only
d1 = "8-1-2003"
d2 = "8-10-2003"
buffer = tas.sel(time=slice(d1, d2))
tas = tas.where(~tas.time.isin(buffer.time), 280)

# growing season from June to end of July
d1 = "6-1-2004"
d2 = "7-31-2004"
buffer = tas.sel(time=slice(d1, d2))
tas = tas.where(~tas.time.isin(buffer.time), 280)

# generate a year of data
tas = tas_series(np.zeros(365), start="2000/1/1")
warm_period = tas.sel(time=slice(d1, d2))
tas = tas.where(~tas.time.isin(warm_period.time), 280)
gsl = xci.growing_season_length(tas)
target = [0, 365, 25, 10, 61]

np.testing.assert_array_equal(gsl, target)
np.testing.assert_array_equal(gsl, expected)

def test_southhemisphere(self, tas_series):
tas = tas_series(np.zeros(2 * 365), start="2000/1/1")
warm_period = tas.sel(time=slice("2000-11-01", "2001-03-01"))
tas = tas.where(~tas.time.isin(warm_period.time), 280)
gsl = xci.growing_season_length(tas, mid_date="01-01", freq="AS-Jul")
np.testing.assert_array_equal(gsl.sel(time="2000-07-01"), 121)


class TestHeatingDegreeDays:
Expand Down
3 changes: 1 addition & 2 deletions xclim/atmos/_temperature.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,8 +420,7 @@ def compute(*args, **kwds):
description="{freq} number of days between the first occurrence of at least "
"six consecutive days with mean daily temperature over {thresh} and "
"the first occurrence of at least {window} consecutive days with "
"mean daily temperature below {thresh} after July 1st in the northern "
"hemisphere and January 1st in the southern hemisphere",
"mean daily temperature below {thresh} after {mid_date}.",
cell_methods="",
compute=indices.growing_season_length,
)
Expand Down
93 changes: 49 additions & 44 deletions xclim/indices/_threshold.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import numpy as np
import xarray
import datetime
from typing import Union

from xclim import run_length as rl
from xclim import utils
Expand Down Expand Up @@ -304,15 +306,21 @@ def growing_degree_days(

@declare_units("days", tas="[temperature]", thresh="[temperature]")
def growing_season_length(
tas: xarray.DataArray, thresh: str = "5.0 degC", window: int = 6, freq: str = "YS"
tas: xarray.DataArray,
thresh: str = "5.0 degC",
window: int = 6,
mid_date: str = "07-01",
freq: str = "YS",
):
r"""Growing season length.

The number of days between the first occurrence of at least
six consecutive days with mean daily temperature over 5℃ and
the first occurrence of at least six consecutive days with
mean daily temperature below 5℃ after July 1st in the northern
hemisphere and January 1st in the southern hemisphere.
mean daily temperature below 5℃ after a certain date. (Usually
July 1st in the northern hemisphere and January 1st in the southern hemisphere.)

WARNING: The default values are only valid for the northern hemisphere.

Parameters
---------
Expand All @@ -322,6 +330,8 @@ def growing_season_length(
Threshold temperature on which to base evaluation [℃] or [K]. Default: '5.0 degC'.
window : int
Minimum number of days with temperature above threshold to mark the beginning and end of growing season.
mid_date : str
Date of the year after which to look for the end of the season. Should have the format '%m-%d'.
freq : str
Resampling frequency; Defaults to "YS".

Expand All @@ -344,50 +354,45 @@ def growing_season_length(
.. math::

TG_{ij} < 5 ℃
"""

# i = xr.DataArray(np.arange(tas.time.size), dims='time')
# ind = xr.broadcast(i, tas)[0]
#
# c = ((tas > thresh) * 1).rolling(time=window).sum()
# i1 = ind.where(c == window).resample(time=freq).min(dim='time')
#
# # Resample sets the time to T00:00.
# i11 = i1.reindex_like(c, method='ffill')
#
# # TODO: Adjust for southern hemisphere
#
# #i2 = ind.where(c == 0).where(tas.time.dt.month >= 7)
# # add check to make sure indice of end of growing season is after growing season start
# i2 = ind.where((c==0) & (ind > i11)).where(tas.time.dt.month >= 7)
#
# d = i2 - i11
#
# # take min value (first occurence after july)
# gsl = d.resample(time=freq).min(dim='time')
#
# # turn nan into 0
# gsl = xr.where(np.isnan(gsl), 0, gsl)

# compute growth season length on resampled data
Examples
--------
If working in the Southern Hemisphere, one can use:
>>> gsl = growing_season_length(tas, mid_date='01-01', freq='AS-Jul')
"""
thresh = utils.convert_units_to(thresh, tas)

c = ((tas > thresh) * 1).rolling(time=window).sum(allow_lazy=True, skipna=False)

def compute_gsl(c):
nt = c.time.size
i = xarray.DataArray(np.arange(nt), dims="time").chunk({"time": 1})
ind = xarray.broadcast(i, c)[0].chunk(c.chunks)
i1 = ind.where(c == window).min(dim="time")
i1 = xarray.where(np.isnan(i1), nt, i1)
i11 = i1.reindex_like(c, method="ffill")
i2 = ind.where((c == 0) & (ind > i11)).where(c.time.dt.month >= 7)
i2 = xarray.where(np.isnan(i2), nt, i2)
d = (i2 - i1).min(dim="time")
return d

gsl = c.resample(time=freq).apply(compute_gsl)

mid_doy = datetime.datetime.strptime(mid_date, "%m-%d").timetuple().tm_yday

def compute_gsl(yrdata):
if (
yrdata.chunks is not None
and len(yrdata.chunks[yrdata.dims.index("time")]) > 1
):
yrdata = yrdata.chunk({"time": -1})
mid_idx = np.where(yrdata.time.dt.dayofyear == mid_doy)[0]
if (
mid_idx.size == 0
): # The mid date is not in the group. Happens at boundaries.
allNans = xarray.full_like(yrdata.isel(time=0), np.nan)
allNans.attrs = {}
return allNans
end = rl.first_run(
yrdata.where(yrdata.time >= yrdata.time[mid_idx][0]) < thresh,
window,
"time",
)
beg = rl.first_run(yrdata > thresh, window, "time")
sl = end - beg
sl = xarray.where(
beg.isnull() & end.notnull(), 0, sl
) # If everything is under thresh
sl = xarray.where(
beg.notnull() & end.isnull(), yrdata.time.size - beg, sl
) # If gs is not ended at end of year
return sl.where(sl >= 0) # When end happens before beg.

gsl = tas.resample(time=freq).map(compute_gsl)
return gsl


Expand Down
3 changes: 2 additions & 1 deletion xclim/run_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,8 @@ def rle_1d(
if n == 0:
e = "run length array empty"
warn(e)
return None, None, None
# Returning None makes some other 1d func below fail.
return np.array(np.nan), 0, np.array(np.nan)

y = np.array(ia[1:] != ia[:-1]) # pairwise unequal (string safe)
i = np.append(np.where(y), n - 1) # must include last element position
Expand Down