diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 6ddf6029b99bb..34ff73082627a 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -185,7 +185,7 @@ Sparse Reshaping ^^^^^^^^^ - +- Joining/Merging with a non unique ``PeriodIndex`` raised a TypeError (:issue:`16871`) Numeric diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e1053c1610175..bbbc19b36964d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3119,14 +3119,14 @@ def _join_multi(self, other, how, return_indexers=True): def _join_non_unique(self, other, how='left', return_indexers=False): from pandas.core.reshape.merge import _get_join_indexers - left_idx, right_idx = _get_join_indexers([self.values], + left_idx, right_idx = _get_join_indexers([self._values], [other._values], how=how, sort=True) left_idx = _ensure_platform_int(left_idx) right_idx = _ensure_platform_int(right_idx) - join_index = np.asarray(self.values.take(left_idx)) + join_index = np.asarray(self._values.take(left_idx)) mask = left_idx == -1 np.putmask(join_index, mask, other._values.take(right_idx)) diff --git a/pandas/tests/reshape/test_join.py b/pandas/tests/reshape/test_join.py index e25661fb65271..e4894307918c6 100644 --- a/pandas/tests/reshape/test_join.py +++ b/pandas/tests/reshape/test_join.py @@ -550,6 +550,18 @@ def test_join_mixed_non_unique_index(self): index=[1, 2, 2, 'a']) tm.assert_frame_equal(result, expected) + def test_join_non_unique_period_index(self): + # GH #16871 + index = pd.period_range('2016-01-01', periods=16, freq='M') + df = DataFrame([i for i in range(len(index))], + index=index, columns=['pnum']) + df2 = concat([df, df]) + result = df.join(df2, how='inner', rsuffix='_df2') + expected = DataFrame( + np.tile(np.arange(16, dtype=np.int64).repeat(2).reshape(-1, 1), 2), + columns=['pnum', 'pnum_df2'], index=df2.sort_index().index) + tm.assert_frame_equal(result, expected) + def test_mixed_type_join_with_suffix(self): # GH #916 df = DataFrame(np.random.randn(20, 6), diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index 4ac376a9752cb..919675188576e 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -585,6 +585,18 @@ def test_merge_on_datetime64tz(self): assert result['value_x'].dtype == 'datetime64[ns, US/Eastern]' assert result['value_y'].dtype == 'datetime64[ns, US/Eastern]' + def test_merge_non_unique_period_index(self): + # GH #16871 + index = pd.period_range('2016-01-01', periods=16, freq='M') + df = DataFrame([i for i in range(len(index))], + index=index, columns=['pnum']) + df2 = concat([df, df]) + result = df.merge(df2, left_index=True, right_index=True, how='inner') + expected = DataFrame( + np.tile(np.arange(16, dtype=np.int64).repeat(2).reshape(-1, 1), 2), + columns=['pnum_x', 'pnum_y'], index=df2.sort_index().index) + tm.assert_frame_equal(result, expected) + def test_merge_on_periods(self): left = pd.DataFrame({'key': pd.period_range('20151010', periods=2, freq='D'),