Skip to content

Commit

Permalink
Revert BUG-24212 fix usage of Index.take in pd.merge
Browse files Browse the repository at this point in the history
  • Loading branch information
TomAugspurger committed Jan 24, 2019
1 parent 94d989e commit 634030b
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 40 deletions.
1 change: 0 additions & 1 deletion doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1826,7 +1826,6 @@ Reshaping
- Bug in :func:`DataFrame.unstack` where a ``ValueError`` was raised when unstacking timezone aware values (:issue:`18338`)
- Bug in :func:`DataFrame.stack` where timezone aware values were converted to timezone naive values (:issue:`19420`)
- Bug in :func:`merge_asof` where a ``TypeError`` was raised when ``by_col`` were timezone aware values (:issue:`21184`)
- Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (:issue:`24212`)
- Bug showing an incorrect shape when throwing error during ``DataFrame`` construction. (:issue:`20742`)

.. _whatsnew_0240.bug_fixes.sparse:
Expand Down
41 changes: 2 additions & 39 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,19 +757,13 @@ def _get_join_info(self):

if self.right_index:
if len(self.left) > 0:
join_index = self._create_join_index(self.left.index,
self.right.index,
left_indexer,
how='right')
join_index = self.left.index.take(left_indexer)
else:
join_index = self.right.index.take(right_indexer)
left_indexer = np.array([-1] * len(join_index))
elif self.left_index:
if len(self.right) > 0:
join_index = self._create_join_index(self.right.index,
self.left.index,
right_indexer,
how='left')
join_index = self.right.index.take(right_indexer)
else:
join_index = self.left.index.take(left_indexer)
right_indexer = np.array([-1] * len(join_index))
Expand All @@ -780,37 +774,6 @@ def _get_join_info(self):
join_index = join_index.astype(object)
return join_index, left_indexer, right_indexer

def _create_join_index(self, index, other_index, indexer, how='left'):
"""
Create a join index by rearranging one index to match another
Parameters
----------
index: Index being rearranged
other_index: Index used to supply values not found in index
indexer: how to rearrange index
how: replacement is only necessary if indexer based on other_index
Returns
-------
join_index
"""
join_index = index.take(indexer)
if (self.how in (how, 'outer') and
not isinstance(other_index, MultiIndex)):
# if final index requires values in other_index but not target
# index, indexer may hold missing (-1) values, causing Index.take
# to take the final value in target index
mask = indexer == -1
if np.any(mask):
# if values missing (-1) from target index,
# take from other_index instead
join_list = join_index.to_numpy()
join_list[mask] = other_index.to_numpy()[mask]
join_index = Index(join_list, dtype=join_index.dtype,
name=join_index.name)
return join_index

def _get_merge_keys(self):
"""
Note: has side effects (copy/delete key columns)
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -940,6 +940,7 @@ def test_merge_two_empty_df_no_division_error(self):
merge(a, a, on=('a', 'b'))

@pytest.mark.parametrize('how', ['left', 'outer'])
@pytest.mark.xfail(reason="GH-24897")
def test_merge_on_index_with_more_values(self, how):
# GH 24212
# pd.merge gets [-1, -1, 0, 1] as right_indexer, ensure that -1 is
Expand Down

0 comments on commit 634030b

Please sign in to comment.