DEPR: Deprecate convert_float (pandas-dev#41176)

TLouf · Jun 1, 2021 · b18982c · b18982c
1 parent ae3577d
commit b18982c
Show file tree

Hide file tree

Showing 5 changed files with 57 additions and 36 deletions.
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -3684,15 +3684,6 @@ one can pass an :class:`~pandas.io.excel.ExcelWriter`.
        df1.to_excel(writer, sheet_name="Sheet1")
        df2.to_excel(writer, sheet_name="Sheet2")
 
-.. note::
-
-    Wringing a little more performance out of ``read_excel``
-    Internally, Excel stores all numeric data as floats. Because this can
-    produce unexpected behavior when reading in data, pandas defaults to trying
-    to convert integers to floats if it doesn't lose information (``1.0 -->
-    1``).  You can pass ``convert_float=False`` to disable this behavior, which
-    may give a slight performance improvement.
-
 .. _io.excel_writing_buffer:
 
 Writing Excel files to memory

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -676,6 +676,7 @@ Deprecations
 - The ``inplace`` parameter of :meth:`Categorical.remove_categories`, :meth:`Categorical.add_categories`, :meth:`Categorical.reorder_categories`, :meth:`Categorical.rename_categories`, :meth:`Categorical.set_categories` is deprecated and will be removed in a future version (:issue:`37643`)
 - Deprecated :func:`merge` producing duplicated columns through the ``suffixes`` keyword  and already existing columns (:issue:`22818`)
 - Deprecated setting :attr:`Categorical._codes`, create a new :class:`Categorical` with the desired codes instead (:issue:`40606`)
+- Deprecated the ``convert_float`` optional argument in :func:`read_excel` and :meth:`ExcelFile.parse` (:issue:`41127`)
 - Deprecated behavior of :meth:`DatetimeIndex.union` with mixed timezones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`)
 - Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`)
 - Deprecated passing arguments as positional in :meth:`DataFrame.clip` and :meth:`Series.clip` (other than ``"upper"`` and ``"lower"``) (:issue:`41485`)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
@@ -2,7 +2,6 @@
 
 import abc
 import datetime
-import inspect
 from io import BytesIO
 import os
 from textwrap import fill
@@ -33,6 +32,7 @@
     deprecate_nonkeyword_arguments,
     doc,
 )
+from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.common import (
     is_bool,
@@ -245,6 +245,10 @@
     Convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric
     data will be read in as floats: Excel stores all numbers as floats
     internally.
+
+    .. deprecated:: 1.3.0
+        convert_float will be removed in a future version
+
 mangle_dupe_cols : bool, default True
     Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than
     'X'...'X'. Passing in False will cause data to be overwritten if there
@@ -355,7 +359,7 @@ def read_excel(
     thousands=None,
     comment=None,
     skipfooter=0,
-    convert_float=True,
+    convert_float=None,
     mangle_dupe_cols=True,
     storage_options: StorageOptions = None,
 ):
@@ -489,11 +493,21 @@ def parse(
         thousands=None,
         comment=None,
         skipfooter=0,
-        convert_float=True,
+        convert_float=None,
         mangle_dupe_cols=True,
         **kwds,
     ):
 
+        if convert_float is None:
+            convert_float = True
+        else:
+            stacklevel = find_stack_level()
+            warnings.warn(
+                "convert_float is deprecated and will be removed in a future version",
+                FutureWarning,
+                stacklevel=stacklevel,
+            )
+
         validate_header_arg(header)
 
         ret_dict = False
@@ -1206,16 +1220,7 @@ def __init__(
                     f"only the xls format is supported. Install openpyxl instead."
                 )
             elif ext and ext != "xls":
-                caller = inspect.stack()[1]
-                if (
-                    caller.filename.endswith(
-                        os.path.join("pandas", "io", "excel", "_base.py")
-                    )
-                    and caller.function == "read_excel"
-                ):
-                    stacklevel = 4
-                else:
-                    stacklevel = 2
+                stacklevel = find_stack_level()
                 warnings.warn(
                     f"Your version of xlrd is {xlrd_version}. In xlrd >= 2.0, "
                     f"only the xls format is supported. Install "
@@ -1251,7 +1256,7 @@ def parse(
         thousands=None,
         comment=None,
         skipfooter=0,
-        convert_float=True,
+        convert_float=None,
         mangle_dupe_cols=True,
         **kwds,
     ):

diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
@@ -434,9 +434,17 @@ def test_reader_special_dtypes(self, request, read_ext):
         float_expected = expected.copy()
         float_expected["IntCol"] = float_expected["IntCol"].astype(float)
         float_expected.loc[float_expected.index[1], "Str2Col"] = 3.0
-        actual = pd.read_excel(
-            basename + read_ext, sheet_name="Sheet1", convert_float=False
-        )
+        with tm.assert_produces_warning(
+            FutureWarning,
+            match="convert_float is deprecated",
+            raise_on_extra_warnings=False,
+        ):
+            # raise_on_extra_warnings because xlrd raises a PendingDeprecationWarning
+            # on database job Linux_py37_IO (ci/deps/actions-37-db.yaml)
+            # See GH#41176
+            actual = pd.read_excel(
+                basename + read_ext, sheet_name="Sheet1", convert_float=False
+            )
         tm.assert_frame_equal(actual, float_expected)
 
         # check setting Index (assuming xls and xlsx are the same here)
@@ -456,12 +464,20 @@ def test_reader_special_dtypes(self, request, read_ext):
 
         no_convert_float = float_expected.copy()
         no_convert_float["StrCol"] = no_convert_float["StrCol"].apply(str)
-        actual = pd.read_excel(
-            basename + read_ext,
-            sheet_name="Sheet1",
-            convert_float=False,
-            converters={"StrCol": str},
-        )
+        with tm.assert_produces_warning(
+            FutureWarning,
+            match="convert_float is deprecated",
+            raise_on_extra_warnings=False,
+        ):
+            # raise_on_extra_warnings because xlrd raises a PendingDeprecationWarning
+            # on database job Linux_py37_IO (ci/deps/actions-37-db.yaml)
+            # See GH#41176
+            actual = pd.read_excel(
+                basename + read_ext,
+                sheet_name="Sheet1",
+                convert_float=False,
+                converters={"StrCol": str},
+            )
         tm.assert_frame_equal(actual, no_convert_float)
 
     # GH8212 - support for converters and missing values

diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py
@@ -474,9 +474,12 @@ def test_int_types(self, np_type, path):
         float_frame = df.astype(float)
         float_frame.columns = float_frame.columns.astype(float)
         float_frame.index = float_frame.index.astype(float)
-        recons = pd.read_excel(
-            path, sheet_name="test1", convert_float=False, index_col=0
-        )
+        with tm.assert_produces_warning(
+            FutureWarning, match="convert_float is deprecated"
+        ):
+            recons = pd.read_excel(
+                path, sheet_name="test1", convert_float=False, index_col=0
+            )
         tm.assert_frame_equal(recons, float_frame)
 
     @pytest.mark.parametrize("np_type", [np.float16, np.float32, np.float64])
@@ -1293,7 +1296,12 @@ def test_merged_cell_custom_objects(self, merge_cells, path):
         )
         expected = DataFrame(np.ones((2, 2)), columns=mi)
         expected.to_excel(path)
-        result = pd.read_excel(path, header=[0, 1], index_col=0, convert_float=False)
+        with tm.assert_produces_warning(
+            FutureWarning, match="convert_float is deprecated"
+        ):
+            result = pd.read_excel(
+                path, header=[0, 1], index_col=0, convert_float=False
+            )
         # need to convert PeriodIndexes to standard Indexes for assert equal
         expected.columns = expected.columns.set_levels(
             [[str(i) for i in mi.levels[0]], [str(i) for i in mi.levels[1]]],