From 2778c2fcfebff972e7e4558b32f3c3a6c5ae0699 Mon Sep 17 00:00:00 2001 From: Alan Velasco Date: Sat, 15 Jul 2017 16:28:23 -0500 Subject: [PATCH] Support non unique period indexes on join and merge operations (#16949) * Support non unique period indexes on join and merge operations * Add frame assertion on tests and release notes * Explicitly use dtype int64 on arange --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/indexes/base.py | 4 ++-- pandas/tests/reshape/test_join.py | 12 ++++++++++++ pandas/tests/reshape/test_merge.py | 12 ++++++++++++ 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 34095d55b8cc9..11d3e4cf964aa 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -184,7 +184,7 @@ Sparse Reshaping ^^^^^^^^^ - +- Joining/Merging with a non unique ``PeriodIndex`` raised a TypeError (:issue:`16871`) Numeric diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e1053c1610175..bbbc19b36964d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3119,14 +3119,14 @@ def _join_multi(self, other, how, return_indexers=True): def _join_non_unique(self, other, how='left', return_indexers=False): from pandas.core.reshape.merge import _get_join_indexers - left_idx, right_idx = _get_join_indexers([self.values], + left_idx, right_idx = _get_join_indexers([self._values], [other._values], how=how, sort=True) left_idx = _ensure_platform_int(left_idx) right_idx = _ensure_platform_int(right_idx) - join_index = np.asarray(self.values.take(left_idx)) + join_index = np.asarray(self._values.take(left_idx)) mask = left_idx == -1 np.putmask(join_index, mask, other._values.take(right_idx)) diff --git a/pandas/tests/reshape/test_join.py b/pandas/tests/reshape/test_join.py index e25661fb65271..e4894307918c6 100644 --- a/pandas/tests/reshape/test_join.py +++ b/pandas/tests/reshape/test_join.py @@ -550,6 +550,18 @@ def test_join_mixed_non_unique_index(self): index=[1, 2, 2, 'a']) tm.assert_frame_equal(result, expected) + def test_join_non_unique_period_index(self): + # GH #16871 + index = pd.period_range('2016-01-01', periods=16, freq='M') + df = DataFrame([i for i in range(len(index))], + index=index, columns=['pnum']) + df2 = concat([df, df]) + result = df.join(df2, how='inner', rsuffix='_df2') + expected = DataFrame( + np.tile(np.arange(16, dtype=np.int64).repeat(2).reshape(-1, 1), 2), + columns=['pnum', 'pnum_df2'], index=df2.sort_index().index) + tm.assert_frame_equal(result, expected) + def test_mixed_type_join_with_suffix(self): # GH #916 df = DataFrame(np.random.randn(20, 6), diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index 4ac376a9752cb..919675188576e 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -585,6 +585,18 @@ def test_merge_on_datetime64tz(self): assert result['value_x'].dtype == 'datetime64[ns, US/Eastern]' assert result['value_y'].dtype == 'datetime64[ns, US/Eastern]' + def test_merge_non_unique_period_index(self): + # GH #16871 + index = pd.period_range('2016-01-01', periods=16, freq='M') + df = DataFrame([i for i in range(len(index))], + index=index, columns=['pnum']) + df2 = concat([df, df]) + result = df.merge(df2, left_index=True, right_index=True, how='inner') + expected = DataFrame( + np.tile(np.arange(16, dtype=np.int64).repeat(2).reshape(-1, 1), 2), + columns=['pnum_x', 'pnum_y'], index=df2.sort_index().index) + tm.assert_frame_equal(result, expected) + def test_merge_on_periods(self): left = pd.DataFrame({'key': pd.period_range('20151010', periods=2, freq='D'),