diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index beebe06e7477e..97adf2aa39710 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -24,6 +24,7 @@ is_numeric_dtype, is_integer, is_int_or_datetime_dtype, + is_datetimelike, is_dtype_equal, is_bool, is_list_like, @@ -877,7 +878,7 @@ def _get_merge_keys(self): return left_keys, right_keys, join_names def _maybe_coerce_merge_keys(self): - # we have valid mergee's but we may have to further + # we have valid mergees but we may have to further # coerce these if they are originally incompatible types # # for example if these are categorical, but are not dtype_equal @@ -894,6 +895,13 @@ def _maybe_coerce_merge_keys(self): if is_categorical_dtype(lk) and is_categorical_dtype(rk): if lk.is_dtype_equal(rk): continue + + # if we are dates with differing categories + # then allow them to proceed because + # coercing to object below results in integers. + if is_datetimelike(lk.categories) and is_datetimelike(rk.categories): + continue + elif is_categorical_dtype(lk) or is_categorical_dtype(rk): pass @@ -904,7 +912,7 @@ def _maybe_coerce_merge_keys(self): # kinds to proceed, eg. int64 and int8 # further if we are object, but we infer to # the same, then proceed - if (is_numeric_dtype(lk) and is_numeric_dtype(rk)): + if is_numeric_dtype(lk) and is_numeric_dtype(rk): if lk.dtype.kind == rk.dtype.kind: continue diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index 919675188576e..d21a8419b384c 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -1,7 +1,7 @@ # pylint: disable=E1103 import pytest -from datetime import datetime +from datetime import datetime, date from numpy.random import randn from numpy import nan import numpy as np @@ -1515,6 +1515,27 @@ def test_self_join_multiple_categories(self): assert_frame_equal(result, df) + def test_dtype_on_categorical_dates(self): + # GH 16900 + # dates should not be coerced to ints + + df = pd.DataFrame( + [[date(2001, 1, 1), 1.1], + [date(2001, 1, 2), 1.3]], + columns=['date', 'num2'] + ) + df['date'] = df['date'].astype('category') + + df2 = pd.DataFrame( + [[date(2001, 1, 1), 1.3], + [date(2001, 1, 3), 1.4]], + columns=['date', 'num4'] + ) + df2['date'] = df2['date'].astype('category') + + result = pd.merge(df, df2, how='outer', on=['date']) + assert result['date'].dtype == 'category' + @pytest.fixture def left_df():