From d0268e719f899789f9606beb4592a17d27086b4c Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Wed, 31 Aug 2022 23:04:02 +0200 Subject: [PATCH] BUG: reindex using wrong fill value when indexing cols and index for uint dtypes (#48185) --- doc/source/whatsnew/v1.6.0.rst | 2 +- pandas/core/array_algos/take.py | 10 +++++++--- pandas/tests/frame/methods/test_reindex.py | 10 ++++++++++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index 6b7ae767d79d4..67e65cfc26764 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -151,7 +151,7 @@ Interval Indexing ^^^^^^^^ -- +- Bug in :meth:`DataFrame.reindex` filling with wrong values when indexing columns and index for ``uint`` dtypes (:issue:`48184`) - Missing diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py index 188725f003f1e..f82aeb6df5e32 100644 --- a/pandas/core/array_algos/take.py +++ b/pandas/core/array_algos/take.py @@ -546,9 +546,13 @@ def _take_2d_multi_object( out[:, col_mask] = fill_value for i in range(len(row_idx)): u_ = row_idx[i] - for j in range(len(col_idx)): - v = col_idx[j] - out[i, j] = arr[u_, v] + + if u_ != -1: + for j in range(len(col_idx)): + v = col_idx[j] + + if v != -1: + out[i, j] = arr[u_, v] def _take_preprocess_indexer_and_fill_value( diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 8575e7895ae5a..daa60be085fd8 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -772,6 +772,16 @@ def test_reindex_fill_value(self): expected = df.reindex(range(15)).fillna(0) tm.assert_frame_equal(result, expected) + def test_reindex_uint_dtypes_fill_value(self, any_unsigned_int_numpy_dtype): + # GH#48184 + df = DataFrame({"a": [1, 2], "b": [1, 2]}, dtype=any_unsigned_int_numpy_dtype) + result = df.reindex(columns=list("abcd"), index=[0, 1, 2, 3], fill_value=10) + expected = DataFrame( + {"a": [1, 2, 10, 10], "b": [1, 2, 10, 10], "c": 10, "d": 10}, + dtype=any_unsigned_int_numpy_dtype, + ) + tm.assert_frame_equal(result, expected) + def test_reindex_dups(self): # GH4746, reindex on duplicate index error messages