From 69d33fca3574bc923a3bfc5984e34bf24e6f3867 Mon Sep 17 00:00:00 2001
From: Sebastian Hahn <sebastian.hahn@geo.tuwien.ac.at>
Date: Tue, 16 Jul 2019 12:50:08 +0200
Subject: [PATCH] Add timezone support for temporal matching and tests

---
 pytesmo/temporal_matching.py    | 12 ++++++++---
 tests/test_temporal_matching.py | 37 +++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/pytesmo/temporal_matching.py b/pytesmo/temporal_matching.py
index 3716f665..a5e66bfc 100644
--- a/pytesmo/temporal_matching.py
+++ b/pytesmo/temporal_matching.py
@@ -69,17 +69,18 @@ def df_temp_merge(df_reference, df_other, return_index=False,
                            right_index=True, direction=direction,
                            tolerance=tolerance)
 
-        df[dist_str] = df[ind_str] - df.index
-        dist_df = df[dist_str].values / np.timedelta64(1, 'D')
+        df[dist_str] = (df[ind_str].values -
+                        df.index.values) / np.timedelta64(1, 'D')
 
         if duplicate_nan:
             unq, unq_idx = np.unique(df[ind_str].values, return_index=True)
             unq_idx = np.concatenate([unq_idx, np.array([len(df)])])
+            dist = df[dist_str].values
 
             no_dup = []
             for j in np.arange(unq_idx.size-1):
                 m = np.argmin(np.abs(
-                    dist_df[unq_idx[j]:unq_idx[j+1]])) + unq_idx[j]
+                    dist[unq_idx[j]:unq_idx[j+1]])) + unq_idx[j]
                 no_dup.append(m)
 
             duplicates = np.ones(len(df), dtype=np.bool)
@@ -153,6 +154,7 @@ def df_match(reference, *args, **kwds):
         if type(arg) is pd.Series:
             arg = pd.DataFrame(arg)
         comp_step = arg.index.values - reference.index.values[0]
+
         values = np.arange(comp_step.size)
         # setup kdtree which must get 2D input
         try:
@@ -264,6 +266,10 @@ def matching(reference, *args, **kwargs):
 
     for match in matched_datasets:
         match = match.drop(['distance', 'index'], axis=1)
+
+        if matched_data.index.tz is not None:
+            match.index = match.index.tz_localize('utc')
+
         matched_data = matched_data.join(match)
 
     return matched_data.dropna()
diff --git a/tests/test_temporal_matching.py b/tests/test_temporal_matching.py
index e067c7ec..a514fceb 100644
--- a/tests/test_temporal_matching.py
+++ b/tests/test_temporal_matching.py
@@ -207,3 +207,40 @@ def test_matching_series():
                                      np.nan, np.nan, np.nan]),
                            matched.matched_data)
     assert len(matched) == 10
+
+
+def test_matching_tz():
+    """
+    test matching function with pd.Series as input and timezone information
+    """
+    ref_tz = 'Europe/London'
+    ref_index = pd.date_range("2007-01-01", "2007-01-10", tz=ref_tz)
+
+    data = np.arange(10.)
+    data[3] = np.nan
+    ref_ser = pd.Series(data, index=ref_index)
+
+    match_tz = 'US/Pacific'
+    match_index = pd.date_range("2007-01-01 09:00:00",
+                                "2007-01-05 09:00:00", tz=match_tz)
+
+    match_ser = pd.Series(np.arange(len(match_index)),
+                          index=match_index, name='matched_data')
+
+    matched = tmatching.matching(ref_ser, match_ser)
+
+    nptest.assert_allclose(np.array([0, 1, 3, 4]), matched.matched_data)
+    assert len(matched) == 4
+
+    matched = tmatching.df_temp_merge(ref_ser, match_ser)
+
+    nptest.assert_allclose(np.array([0, 0, 1, 2, 3, 4, 4, 4, 4, 4]),
+                           matched.matched_data)
+    assert len(matched) == 10
+
+    matched = tmatching.df_temp_merge(ref_ser, match_ser, duplicate_nan=True)
+
+    nptest.assert_allclose(np.array([np.nan, 0, 1, 2, 3, 4, np.nan, np.nan,
+                                     np.nan, np.nan]),
+                           matched.matched_data)
+    assert len(matched) == 10