Skip to content

Commit

Permalink
BUG: DataFrame.merge(suffixes=) does not respect None (pandas-dev#24819)
Browse files Browse the repository at this point in the history
  • Loading branch information
charlesdong1991 authored and Pingviinituutti committed Feb 28, 2019
1 parent 6dc29c5 commit 8dea933
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 10 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ Groupby/Resample/Rolling
Reshaping
^^^^^^^^^

- Bug in :func:`pandas.merge` adds a string of ``None`` if ``None`` is assigned in suffixes instead of remain the column name as-is (:issue:`24782`).
- Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (:issue:`24212`)
- :func:`to_records` now accepts dtypes to its `column_dtypes` parameter (:issue:`24895`)
-
Expand Down
26 changes: 19 additions & 7 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1971,16 +1971,28 @@ def items_overlap_with_suffix(left, lsuffix, right, rsuffix):
raise ValueError('columns overlap but no suffix specified: '
'{rename}'.format(rename=to_rename))

def lrenamer(x):
if x in to_rename:
return '{x}{lsuffix}'.format(x=x, lsuffix=lsuffix)
return x
def renamer(x, suffix):
"""Rename the left and right indices.
If there is overlap, and suffix is not None, add
suffix, otherwise, leave it as-is.
Parameters
----------
x : original column name
suffix : str or None
def rrenamer(x):
if x in to_rename:
return '{x}{rsuffix}'.format(x=x, rsuffix=rsuffix)
Returns
-------
x : renamed column name
"""
if x in to_rename and suffix is not None:
return '{x}{suffix}'.format(x=x, suffix=suffix)
return x

lrenamer = partial(renamer, suffix=lsuffix)
rrenamer = partial(renamer, suffix=rsuffix)

return (_transform_index(left, lrenamer),
_transform_index(right, rrenamer))

Expand Down
12 changes: 9 additions & 3 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,15 @@ def merge_ordered(left, right, on=None,
left DataFrame
fill_method : {'ffill', None}, default None
Interpolation method for data
suffixes : 2-length sequence (tuple, list, ...)
Suffix to apply to overlapping column names in the left and right
side, respectively
suffixes : Sequence, default is ("_x", "_y")
A length-2 sequence where each element is optionally a string
indicating the suffix to add to overlapping column names in
`left` and `right` respectively. Pass a value of `None` instead
of a string to indicate that the column name from `left` or
`right` should be left as-is, with no suffix. At least one of the
values must not be None.
.. versionchanged:: 0.25.0
how : {'left', 'right', 'outer', 'inner'}, default 'outer'
* left: use only keys from left frame (SQL: left outer join)
* right: use only keys from right frame (SQL: right outer join)
Expand Down
62 changes: 62 additions & 0 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1526,3 +1526,65 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nm):
with pytest.raises(ValueError, match=msg):
result = pd.merge(a, b, on=on, left_on=left_on, right_on=right_on,
left_index=left_index, right_index=right_index)


@pytest.mark.parametrize("col1, col2, kwargs, expected_cols", [
(0, 0, dict(suffixes=("", "_dup")), ["0", "0_dup"]),
(0, 0, dict(suffixes=(None, "_dup")), [0, "0_dup"]),
(0, 0, dict(suffixes=("_x", "_y")), ["0_x", "0_y"]),
("a", 0, dict(suffixes=(None, "_y")), ["a", 0]),
(0.0, 0.0, dict(suffixes=("_x", None)), ["0.0_x", 0.0]),
("b", "b", dict(suffixes=(None, "_y")), ["b", "b_y"]),
("a", "a", dict(suffixes=("_x", None)), ["a_x", "a"]),
("a", "b", dict(suffixes=("_x", None)), ["a", "b"]),
("a", "a", dict(suffixes=[None, "_x"]), ["a", "a_x"]),
(0, 0, dict(suffixes=["_a", None]), ["0_a", 0]),
("a", "a", dict(), ["a_x", "a_y"]),
(0, 0, dict(), ["0_x", "0_y"])
])
def test_merge_suffix(col1, col2, kwargs, expected_cols):
# issue: 24782
a = pd.DataFrame({col1: [1, 2, 3]})
b = pd.DataFrame({col2: [4, 5, 6]})

expected = pd.DataFrame([[1, 4], [2, 5], [3, 6]],
columns=expected_cols)

result = a.merge(b, left_index=True, right_index=True, **kwargs)
tm.assert_frame_equal(result, expected)

result = pd.merge(a, b, left_index=True, right_index=True, **kwargs)
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("col1, col2, suffixes", [
("a", "a", [None, None]),
("a", "a", (None, None)),
("a", "a", ("", None)),
(0, 0, [None, None]),
(0, 0, (None, ""))
])
def test_merge_suffix_error(col1, col2, suffixes):
# issue: 24782
a = pd.DataFrame({col1: [1, 2, 3]})
b = pd.DataFrame({col2: [3, 4, 5]})

# TODO: might reconsider current raise behaviour, see issue 24782
msg = "columns overlap but no suffix specified"
with pytest.raises(ValueError, match=msg):
pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes)


@pytest.mark.parametrize("col1, col2, suffixes", [
("a", "a", None),
(0, 0, None)
])
def test_merge_suffix_none_error(col1, col2, suffixes):
# issue: 24782
a = pd.DataFrame({col1: [1, 2, 3]})
b = pd.DataFrame({col2: [3, 4, 5]})

# TODO: might reconsider current raise behaviour, see GH24782
msg = "iterable"
with pytest.raises(TypeError, match=msg):
pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes)

0 comments on commit 8dea933

Please sign in to comment.