diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index ef3600266c037..35de45a2b936c 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -595,12 +595,13 @@ def fillna(self, value, downcast=None): if issubclass(self.dtype.type, np.floating): value = float(value) + new_values = self.sp_values.copy() + new_values[isnull(new_values)] = value + if self._null_fill_value: - return self._simple_new(self.sp_values, self.sp_index, + return self._simple_new(new_values, self.sp_index, fill_value=value) else: - new_values = self.sp_values.copy() - new_values[isnull(new_values)] = value return self._simple_new(new_values, self.sp_index, fill_value=self.fill_value) diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index 6b54dca8e93d5..fe7bd4a66baec 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -1245,6 +1245,53 @@ def test_from_to_scipy_object(spmatrix, fill_value): assert sdf.to_coo().dtype == res_dtype +def test_from_scipy_object_fillna(spmatrix): + columns = list('cd') + index = list('ab') + tm.skip_if_no_package('scipy', max_version='0.19.0') + + # Explicitly convert one zero to np.nan + arr = np.array([[2.0, 0.0], [np.nan, 1.0]]) + try: + spm = spmatrix(arr) + assert spm.dtype == arr.dtype + except (TypeError, AssertionError): + # If conversion to sparse fails for this spmatrix type and arr.dtype, + # then the combination is not currently supported in NumPy, so we + # can just skip testing it thoroughly + return + + sdf = pd.SparseDataFrame(spm, index=index, columns=columns).fillna(-1.0) + + # Returning frame should fill all nan values with -1.0 + expected = pd.SparseDataFrame({"c": {"a": 2.0, "b": np.nan}, + "d": {"a": np.nan, "b": 1.0}}).fillna(-1.0) + expected_bsr = pd.SparseDataFrame({"c": {"a": 2.0, "b": np.nan}, + "d": {"a": 0.0, "b": 1.0}}).fillna(-1.0) + + import sys + from scipy.sparse.bsr import bsr_matrix + from scipy.sparse.dia import dia_matrix + from scipy.sparse.dok import dok_matrix + if spmatrix == bsr_matrix: + # A SparseDataFrame from a bsr matrix does not fill 0s + # Therefore, only the explicit nan value needs to be filled with -1 + tm.assert_frame_equal(sdf.to_dense(), expected_bsr.to_dense()) + elif spmatrix == dia_matrix: + # the dia matrix has a bug of a different nature, + # so is currently passed in this test suite + pass + elif spmatrix == dok_matrix and sys.version_info.major == 2: + # the dok matrix in python2 has a bug of a different nature, + # so is currently passed in this test suite + pass + else: + # The internal representations can differ. + # This test is here to ensure that all nan values are filled, + # regardless of origin. + tm.assert_frame_equal(sdf.to_dense(), expected.to_dense()) + + class TestSparseDataFrameArithmetic(tm.TestCase): def test_numeric_op_scalar(self):