From 69d33fca3574bc923a3bfc5984e34bf24e6f3867 Mon Sep 17 00:00:00 2001 From: Sebastian Hahn Date: Tue, 16 Jul 2019 12:50:08 +0200 Subject: [PATCH] Add timezone support for temporal matching and tests --- pytesmo/temporal_matching.py | 12 ++++++++--- tests/test_temporal_matching.py | 37 +++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/pytesmo/temporal_matching.py b/pytesmo/temporal_matching.py index 3716f665..a5e66bfc 100644 --- a/pytesmo/temporal_matching.py +++ b/pytesmo/temporal_matching.py @@ -69,17 +69,18 @@ def df_temp_merge(df_reference, df_other, return_index=False, right_index=True, direction=direction, tolerance=tolerance) - df[dist_str] = df[ind_str] - df.index - dist_df = df[dist_str].values / np.timedelta64(1, 'D') + df[dist_str] = (df[ind_str].values - + df.index.values) / np.timedelta64(1, 'D') if duplicate_nan: unq, unq_idx = np.unique(df[ind_str].values, return_index=True) unq_idx = np.concatenate([unq_idx, np.array([len(df)])]) + dist = df[dist_str].values no_dup = [] for j in np.arange(unq_idx.size-1): m = np.argmin(np.abs( - dist_df[unq_idx[j]:unq_idx[j+1]])) + unq_idx[j] + dist[unq_idx[j]:unq_idx[j+1]])) + unq_idx[j] no_dup.append(m) duplicates = np.ones(len(df), dtype=np.bool) @@ -153,6 +154,7 @@ def df_match(reference, *args, **kwds): if type(arg) is pd.Series: arg = pd.DataFrame(arg) comp_step = arg.index.values - reference.index.values[0] + values = np.arange(comp_step.size) # setup kdtree which must get 2D input try: @@ -264,6 +266,10 @@ def matching(reference, *args, **kwargs): for match in matched_datasets: match = match.drop(['distance', 'index'], axis=1) + + if matched_data.index.tz is not None: + match.index = match.index.tz_localize('utc') + matched_data = matched_data.join(match) return matched_data.dropna() diff --git a/tests/test_temporal_matching.py b/tests/test_temporal_matching.py index e067c7ec..a514fceb 100644 --- a/tests/test_temporal_matching.py +++ b/tests/test_temporal_matching.py @@ -207,3 +207,40 @@ def test_matching_series(): np.nan, np.nan, np.nan]), matched.matched_data) assert len(matched) == 10 + + +def test_matching_tz(): + """ + test matching function with pd.Series as input and timezone information + """ + ref_tz = 'Europe/London' + ref_index = pd.date_range("2007-01-01", "2007-01-10", tz=ref_tz) + + data = np.arange(10.) + data[3] = np.nan + ref_ser = pd.Series(data, index=ref_index) + + match_tz = 'US/Pacific' + match_index = pd.date_range("2007-01-01 09:00:00", + "2007-01-05 09:00:00", tz=match_tz) + + match_ser = pd.Series(np.arange(len(match_index)), + index=match_index, name='matched_data') + + matched = tmatching.matching(ref_ser, match_ser) + + nptest.assert_allclose(np.array([0, 1, 3, 4]), matched.matched_data) + assert len(matched) == 4 + + matched = tmatching.df_temp_merge(ref_ser, match_ser) + + nptest.assert_allclose(np.array([0, 0, 1, 2, 3, 4, 4, 4, 4, 4]), + matched.matched_data) + assert len(matched) == 10 + + matched = tmatching.df_temp_merge(ref_ser, match_ser, duplicate_nan=True) + + nptest.assert_allclose(np.array([np.nan, 0, 1, 2, 3, 4, np.nan, np.nan, + np.nan, np.nan]), + matched.matched_data) + assert len(matched) == 10