diff --git a/HISTORY.rst b/HISTORY.rst index ec7dbd312..4d84de91a 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -5,6 +5,7 @@ History 0.14.x ------ * Fix in `subset.wrap_lons_and_split_at_greenwich` to preserve multi-region dataframes. +* Improve the memory use of `indices.growing_season_length`. 0.13.x (2020-01-10) diff --git a/tests/test_indices.py b/tests/test_indices.py index b0a1161d5..0c4297d11 100644 --- a/tests/test_indices.py +++ b/tests/test_indices.py @@ -334,48 +334,32 @@ def test_simple(self, tas_series): class TestGrowingSeasonLength: - def test_simple(self, tas_series): + @pytest.mark.parametrize( + "d1,d2,expected", + [ + ("1950-01-01", "1951-01-01", 0), # No growing season + ("2000-01-01", "2000-12-31", 365), # All year growing season + ("2000-07-10", "2001-01-01", np.nan), # End happens before start + ("2000-06-15", "2001-01-01", 199), # No end + ("2000-06-15", "2000-07-15", 31), # Normal case + ], + ) + def test_simple(self, tas_series, d1, d2, expected): # test for different growing length - # generate 5 years of data - a = np.zeros(366 * 2 + 365 * 3) - tas = tas_series(a, start="2000/1/1") - - # 2000 : no growing season - - # 2001 : growing season all year - d1 = "27-12-2000" - d2 = "31-12-2001" - buffer = tas.sel(time=slice(d1, d2)) - tas = tas.where(~tas.time.isin(buffer.time), 280) - - # 2002 : growing season in June only - d1 = "6-1-2002" - d2 = "6-10-2002" - buffer = tas.sel(time=slice(d1, d2)) - tas = tas.where(~tas.time.isin(buffer.time), 280) - # - # comment: - # correct answer should be 10 (i.e. there are 10 days - # with tas > 5 degC) but current definition imposes end - # of growing season to be equal or later than July 1st. - - # growing season in Aug only - d1 = "8-1-2003" - d2 = "8-10-2003" - buffer = tas.sel(time=slice(d1, d2)) - tas = tas.where(~tas.time.isin(buffer.time), 280) - - # growing season from June to end of July - d1 = "6-1-2004" - d2 = "7-31-2004" - buffer = tas.sel(time=slice(d1, d2)) - tas = tas.where(~tas.time.isin(buffer.time), 280) - + # generate a year of data + tas = tas_series(np.zeros(365), start="2000/1/1") + warm_period = tas.sel(time=slice(d1, d2)) + tas = tas.where(~tas.time.isin(warm_period.time), 280) gsl = xci.growing_season_length(tas) - target = [0, 365, 25, 10, 61] - - np.testing.assert_array_equal(gsl, target) + np.testing.assert_array_equal(gsl, expected) + + def test_southhemisphere(self, tas_series): + tas = tas_series(np.zeros(2 * 365), start="2000/1/1") + warm_period = tas.sel(time=slice("2000-11-01", "2001-03-01")) + tas = tas.where(~tas.time.isin(warm_period.time), 280) + gsl = xci.growing_season_length(tas, mid_date="01-01", freq="AS-Jul") + np.testing.assert_array_equal(gsl.sel(time="2000-07-01"), 121) class TestHeatingDegreeDays: diff --git a/xclim/atmos/_temperature.py b/xclim/atmos/_temperature.py index 9fadd0b61..0129412f8 100644 --- a/xclim/atmos/_temperature.py +++ b/xclim/atmos/_temperature.py @@ -420,8 +420,7 @@ def compute(*args, **kwds): description="{freq} number of days between the first occurrence of at least " "six consecutive days with mean daily temperature over {thresh} and " "the first occurrence of at least {window} consecutive days with " - "mean daily temperature below {thresh} after July 1st in the northern " - "hemisphere and January 1st in the southern hemisphere", + "mean daily temperature below {thresh} after {mid_date}.", cell_methods="", compute=indices.growing_season_length, ) diff --git a/xclim/indices/_threshold.py b/xclim/indices/_threshold.py index 486ee8412..d950da287 100644 --- a/xclim/indices/_threshold.py +++ b/xclim/indices/_threshold.py @@ -1,5 +1,7 @@ import numpy as np import xarray +import datetime +from typing import Union from xclim import run_length as rl from xclim import utils @@ -304,15 +306,21 @@ def growing_degree_days( @declare_units("days", tas="[temperature]", thresh="[temperature]") def growing_season_length( - tas: xarray.DataArray, thresh: str = "5.0 degC", window: int = 6, freq: str = "YS" + tas: xarray.DataArray, + thresh: str = "5.0 degC", + window: int = 6, + mid_date: str = "07-01", + freq: str = "YS", ): r"""Growing season length. The number of days between the first occurrence of at least six consecutive days with mean daily temperature over 5℃ and the first occurrence of at least six consecutive days with - mean daily temperature below 5℃ after July 1st in the northern - hemisphere and January 1st in the southern hemisphere. + mean daily temperature below 5℃ after a certain date. (Usually + July 1st in the northern hemisphere and January 1st in the southern hemisphere.) + + WARNING: The default values are only valid for the northern hemisphere. Parameters --------- @@ -322,6 +330,8 @@ def growing_season_length( Threshold temperature on which to base evaluation [℃] or [K]. Default: '5.0 degC'. window : int Minimum number of days with temperature above threshold to mark the beginning and end of growing season. + mid_date : str + Date of the year after which to look for the end of the season. Should have the format '%m-%d'. freq : str Resampling frequency; Defaults to "YS". @@ -344,50 +354,45 @@ def growing_season_length( .. math:: TG_{ij} < 5 ℃ - """ - # i = xr.DataArray(np.arange(tas.time.size), dims='time') - # ind = xr.broadcast(i, tas)[0] - # - # c = ((tas > thresh) * 1).rolling(time=window).sum() - # i1 = ind.where(c == window).resample(time=freq).min(dim='time') - # - # # Resample sets the time to T00:00. - # i11 = i1.reindex_like(c, method='ffill') - # - # # TODO: Adjust for southern hemisphere - # - # #i2 = ind.where(c == 0).where(tas.time.dt.month >= 7) - # # add check to make sure indice of end of growing season is after growing season start - # i2 = ind.where((c==0) & (ind > i11)).where(tas.time.dt.month >= 7) - # - # d = i2 - i11 - # - # # take min value (first occurence after july) - # gsl = d.resample(time=freq).min(dim='time') - # - # # turn nan into 0 - # gsl = xr.where(np.isnan(gsl), 0, gsl) - - # compute growth season length on resampled data + Examples + -------- + If working in the Southern Hemisphere, one can use: + >>> gsl = growing_season_length(tas, mid_date='01-01', freq='AS-Jul') + """ thresh = utils.convert_units_to(thresh, tas) - c = ((tas > thresh) * 1).rolling(time=window).sum(allow_lazy=True, skipna=False) - - def compute_gsl(c): - nt = c.time.size - i = xarray.DataArray(np.arange(nt), dims="time").chunk({"time": 1}) - ind = xarray.broadcast(i, c)[0].chunk(c.chunks) - i1 = ind.where(c == window).min(dim="time") - i1 = xarray.where(np.isnan(i1), nt, i1) - i11 = i1.reindex_like(c, method="ffill") - i2 = ind.where((c == 0) & (ind > i11)).where(c.time.dt.month >= 7) - i2 = xarray.where(np.isnan(i2), nt, i2) - d = (i2 - i1).min(dim="time") - return d - - gsl = c.resample(time=freq).apply(compute_gsl) - + mid_doy = datetime.datetime.strptime(mid_date, "%m-%d").timetuple().tm_yday + + def compute_gsl(yrdata): + if ( + yrdata.chunks is not None + and len(yrdata.chunks[yrdata.dims.index("time")]) > 1 + ): + yrdata = yrdata.chunk({"time": -1}) + mid_idx = np.where(yrdata.time.dt.dayofyear == mid_doy)[0] + if ( + mid_idx.size == 0 + ): # The mid date is not in the group. Happens at boundaries. + allNans = xarray.full_like(yrdata.isel(time=0), np.nan) + allNans.attrs = {} + return allNans + end = rl.first_run( + yrdata.where(yrdata.time >= yrdata.time[mid_idx][0]) < thresh, + window, + "time", + ) + beg = rl.first_run(yrdata > thresh, window, "time") + sl = end - beg + sl = xarray.where( + beg.isnull() & end.notnull(), 0, sl + ) # If everything is under thresh + sl = xarray.where( + beg.notnull() & end.isnull(), yrdata.time.size - beg, sl + ) # If gs is not ended at end of year + return sl.where(sl >= 0) # When end happens before beg. + + gsl = tas.resample(time=freq).map(compute_gsl) return gsl diff --git a/xclim/run_length.py b/xclim/run_length.py index 96811898e..0824dc57d 100644 --- a/xclim/run_length.py +++ b/xclim/run_length.py @@ -262,7 +262,8 @@ def rle_1d( if n == 0: e = "run length array empty" warn(e) - return None, None, None + # Returning None makes some other 1d func below fail. + return np.array(np.nan), 0, np.array(np.nan) y = np.array(ia[1:] != ia[:-1]) # pairwise unequal (string safe) i = np.append(np.where(y), n - 1) # must include last element position