Skip to content

Commit

Permalink
Add timezone support for temporal matching and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
sebhahn committed Jul 16, 2019
1 parent 3f3bc86 commit 69d33fc
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 3 deletions.
12 changes: 9 additions & 3 deletions pytesmo/temporal_matching.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,17 +69,18 @@ def df_temp_merge(df_reference, df_other, return_index=False,
right_index=True, direction=direction,
tolerance=tolerance)

df[dist_str] = df[ind_str] - df.index
dist_df = df[dist_str].values / np.timedelta64(1, 'D')
df[dist_str] = (df[ind_str].values -
df.index.values) / np.timedelta64(1, 'D')

if duplicate_nan:
unq, unq_idx = np.unique(df[ind_str].values, return_index=True)
unq_idx = np.concatenate([unq_idx, np.array([len(df)])])
dist = df[dist_str].values

no_dup = []
for j in np.arange(unq_idx.size-1):
m = np.argmin(np.abs(
dist_df[unq_idx[j]:unq_idx[j+1]])) + unq_idx[j]
dist[unq_idx[j]:unq_idx[j+1]])) + unq_idx[j]
no_dup.append(m)

duplicates = np.ones(len(df), dtype=np.bool)
Expand Down Expand Up @@ -153,6 +154,7 @@ def df_match(reference, *args, **kwds):
if type(arg) is pd.Series:
arg = pd.DataFrame(arg)
comp_step = arg.index.values - reference.index.values[0]

values = np.arange(comp_step.size)
# setup kdtree which must get 2D input
try:
Expand Down Expand Up @@ -264,6 +266,10 @@ def matching(reference, *args, **kwargs):

for match in matched_datasets:
match = match.drop(['distance', 'index'], axis=1)

if matched_data.index.tz is not None:
match.index = match.index.tz_localize('utc')

matched_data = matched_data.join(match)

return matched_data.dropna()
37 changes: 37 additions & 0 deletions tests/test_temporal_matching.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,3 +207,40 @@ def test_matching_series():
np.nan, np.nan, np.nan]),
matched.matched_data)
assert len(matched) == 10


def test_matching_tz():
"""
test matching function with pd.Series as input and timezone information
"""
ref_tz = 'Europe/London'
ref_index = pd.date_range("2007-01-01", "2007-01-10", tz=ref_tz)

data = np.arange(10.)
data[3] = np.nan
ref_ser = pd.Series(data, index=ref_index)

match_tz = 'US/Pacific'
match_index = pd.date_range("2007-01-01 09:00:00",
"2007-01-05 09:00:00", tz=match_tz)

match_ser = pd.Series(np.arange(len(match_index)),
index=match_index, name='matched_data')

matched = tmatching.matching(ref_ser, match_ser)

nptest.assert_allclose(np.array([0, 1, 3, 4]), matched.matched_data)
assert len(matched) == 4

matched = tmatching.df_temp_merge(ref_ser, match_ser)

nptest.assert_allclose(np.array([0, 0, 1, 2, 3, 4, 4, 4, 4, 4]),
matched.matched_data)
assert len(matched) == 10

matched = tmatching.df_temp_merge(ref_ser, match_ser, duplicate_nan=True)

nptest.assert_allclose(np.array([np.nan, 0, 1, 2, 3, 4, np.nan, np.nan,
np.nan, np.nan]),
matched.matched_data)
assert len(matched) == 10

0 comments on commit 69d33fc

Please sign in to comment.