Skip to content

Commit

Permalink
BUG: do not cast ints to floats if inputs o crosstab are not aligned
Browse files Browse the repository at this point in the history
  • Loading branch information
toobaz committed Jul 18, 2017
1 parent fcb0263 commit c70b22c
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 11 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -204,3 +204,4 @@ Categorical
Other
^^^^^
- Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`)
- Bug in :func:`crosstab` where non-aligned series of integers were casted to float (:issue:`17005`)
29 changes: 18 additions & 11 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pandas import Series, DataFrame, MultiIndex, Index
from pandas.core.groupby import Grouper
from pandas.core.reshape.util import cartesian_product
from pandas.core.index import _get_combined_index
from pandas.compat import range, lrange, zip
from pandas import compat
import pandas.core.common as com
Expand Down Expand Up @@ -493,6 +494,13 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
rownames = _get_names(index, rownames, prefix='row')
colnames = _get_names(columns, colnames, prefix='col')

obs_idxes = [obj.index for objs in (index, columns) for obj in objs
if hasattr(obj, 'index')]
if obs_idxes:
common_idx = _get_combined_index(obs_idxes, intersect=True)
else:
common_idx = None

data = {}
data.update(zip(rownames, index))
data.update(zip(colnames, columns))
Expand All @@ -503,20 +511,19 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
if values is not None and aggfunc is None:
raise ValueError("values cannot be used without an aggfunc.")

df = DataFrame(data, index=common_idx)
if values is None:
df = DataFrame(data)
df['__dummy__'] = 0
table = df.pivot_table('__dummy__', index=rownames, columns=colnames,
aggfunc=len, margins=margins,
margins_name=margins_name, dropna=dropna)
table = table.fillna(0).astype(np.int64)

aggfunc = len
else:
data['__dummy__'] = values
df = DataFrame(data)
table = df.pivot_table('__dummy__', index=rownames, columns=colnames,
aggfunc=aggfunc, margins=margins,
margins_name=margins_name, dropna=dropna)
df['__dummy__'] = values

table = df.pivot_table('__dummy__', index=rownames, columns=colnames,
aggfunc=aggfunc, margins=margins,
margins_name=margins_name, dropna=dropna)

if values is None:
table = table.fillna(0).astype(np.int64)

# Post-process
if normalize is not False:
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -1058,6 +1058,22 @@ def test_crosstab_ndarray(self):
assert result.index.name == 'row_0'
assert result.columns.name == 'col_0'

def test_crosstab_non_aligned(self):
# GH 17005
a = pd.Series([0, 1, 1], index=['a', 'b', 'c'])
b = pd.Series([3, 4, 3, 4, 3], index=['a', 'b', 'c', 'd', 'f'])
c = np.array([3, 4, 3])

expected = pd.DataFrame([[1, 0], [1, 1]],
index=Index([0, 1], name='row_0'),
columns=Index([3, 4], name='col_0'))

result = crosstab(a, b)
tm.assert_frame_equal(result, expected)

result = crosstab(a, c)
tm.assert_frame_equal(result, expected)

def test_crosstab_margins(self):
a = np.random.randint(0, 7, size=100)
b = np.random.randint(0, 3, size=100)
Expand Down

0 comments on commit c70b22c

Please sign in to comment.