diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 953046a3a2a10..8ca23c68657a1 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -3684,15 +3684,6 @@ one can pass an :class:`~pandas.io.excel.ExcelWriter`. df1.to_excel(writer, sheet_name="Sheet1") df2.to_excel(writer, sheet_name="Sheet2") -.. note:: - - Wringing a little more performance out of ``read_excel`` - Internally, Excel stores all numeric data as floats. Because this can - produce unexpected behavior when reading in data, pandas defaults to trying - to convert integers to floats if it doesn't lose information (``1.0 --> - 1``). You can pass ``convert_float=False`` to disable this behavior, which - may give a slight performance improvement. - .. _io.excel_writing_buffer: Writing Excel files to memory diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index db96cbf0f1219..a4236b5d2017e 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -676,6 +676,7 @@ Deprecations - The ``inplace`` parameter of :meth:`Categorical.remove_categories`, :meth:`Categorical.add_categories`, :meth:`Categorical.reorder_categories`, :meth:`Categorical.rename_categories`, :meth:`Categorical.set_categories` is deprecated and will be removed in a future version (:issue:`37643`) - Deprecated :func:`merge` producing duplicated columns through the ``suffixes`` keyword and already existing columns (:issue:`22818`) - Deprecated setting :attr:`Categorical._codes`, create a new :class:`Categorical` with the desired codes instead (:issue:`40606`) +- Deprecated the ``convert_float`` optional argument in :func:`read_excel` and :meth:`ExcelFile.parse` (:issue:`41127`) - Deprecated behavior of :meth:`DatetimeIndex.union` with mixed timezones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`) - Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`) - Deprecated passing arguments as positional in :meth:`DataFrame.clip` and :meth:`Series.clip` (other than ``"upper"`` and ``"lower"``) (:issue:`41485`) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 9b8e40a977545..42ca68376452d 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -2,7 +2,6 @@ import abc import datetime -import inspect from io import BytesIO import os from textwrap import fill @@ -33,6 +32,7 @@ deprecate_nonkeyword_arguments, doc, ) +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( is_bool, @@ -245,6 +245,10 @@ Convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric data will be read in as floats: Excel stores all numbers as floats internally. + + .. deprecated:: 1.3.0 + convert_float will be removed in a future version + mangle_dupe_cols : bool, default True Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than 'X'...'X'. Passing in False will cause data to be overwritten if there @@ -355,7 +359,7 @@ def read_excel( thousands=None, comment=None, skipfooter=0, - convert_float=True, + convert_float=None, mangle_dupe_cols=True, storage_options: StorageOptions = None, ): @@ -489,11 +493,21 @@ def parse( thousands=None, comment=None, skipfooter=0, - convert_float=True, + convert_float=None, mangle_dupe_cols=True, **kwds, ): + if convert_float is None: + convert_float = True + else: + stacklevel = find_stack_level() + warnings.warn( + "convert_float is deprecated and will be removed in a future version", + FutureWarning, + stacklevel=stacklevel, + ) + validate_header_arg(header) ret_dict = False @@ -1206,16 +1220,7 @@ def __init__( f"only the xls format is supported. Install openpyxl instead." ) elif ext and ext != "xls": - caller = inspect.stack()[1] - if ( - caller.filename.endswith( - os.path.join("pandas", "io", "excel", "_base.py") - ) - and caller.function == "read_excel" - ): - stacklevel = 4 - else: - stacklevel = 2 + stacklevel = find_stack_level() warnings.warn( f"Your version of xlrd is {xlrd_version}. In xlrd >= 2.0, " f"only the xls format is supported. Install " @@ -1251,7 +1256,7 @@ def parse( thousands=None, comment=None, skipfooter=0, - convert_float=True, + convert_float=None, mangle_dupe_cols=True, **kwds, ): diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index aec638a0d8612..a9e4f52ce0c28 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -434,9 +434,17 @@ def test_reader_special_dtypes(self, request, read_ext): float_expected = expected.copy() float_expected["IntCol"] = float_expected["IntCol"].astype(float) float_expected.loc[float_expected.index[1], "Str2Col"] = 3.0 - actual = pd.read_excel( - basename + read_ext, sheet_name="Sheet1", convert_float=False - ) + with tm.assert_produces_warning( + FutureWarning, + match="convert_float is deprecated", + raise_on_extra_warnings=False, + ): + # raise_on_extra_warnings because xlrd raises a PendingDeprecationWarning + # on database job Linux_py37_IO (ci/deps/actions-37-db.yaml) + # See GH#41176 + actual = pd.read_excel( + basename + read_ext, sheet_name="Sheet1", convert_float=False + ) tm.assert_frame_equal(actual, float_expected) # check setting Index (assuming xls and xlsx are the same here) @@ -456,12 +464,20 @@ def test_reader_special_dtypes(self, request, read_ext): no_convert_float = float_expected.copy() no_convert_float["StrCol"] = no_convert_float["StrCol"].apply(str) - actual = pd.read_excel( - basename + read_ext, - sheet_name="Sheet1", - convert_float=False, - converters={"StrCol": str}, - ) + with tm.assert_produces_warning( + FutureWarning, + match="convert_float is deprecated", + raise_on_extra_warnings=False, + ): + # raise_on_extra_warnings because xlrd raises a PendingDeprecationWarning + # on database job Linux_py37_IO (ci/deps/actions-37-db.yaml) + # See GH#41176 + actual = pd.read_excel( + basename + read_ext, + sheet_name="Sheet1", + convert_float=False, + converters={"StrCol": str}, + ) tm.assert_frame_equal(actual, no_convert_float) # GH8212 - support for converters and missing values diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 67a78f2b1de76..77837bea3e48a 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -474,9 +474,12 @@ def test_int_types(self, np_type, path): float_frame = df.astype(float) float_frame.columns = float_frame.columns.astype(float) float_frame.index = float_frame.index.astype(float) - recons = pd.read_excel( - path, sheet_name="test1", convert_float=False, index_col=0 - ) + with tm.assert_produces_warning( + FutureWarning, match="convert_float is deprecated" + ): + recons = pd.read_excel( + path, sheet_name="test1", convert_float=False, index_col=0 + ) tm.assert_frame_equal(recons, float_frame) @pytest.mark.parametrize("np_type", [np.float16, np.float32, np.float64]) @@ -1293,7 +1296,12 @@ def test_merged_cell_custom_objects(self, merge_cells, path): ) expected = DataFrame(np.ones((2, 2)), columns=mi) expected.to_excel(path) - result = pd.read_excel(path, header=[0, 1], index_col=0, convert_float=False) + with tm.assert_produces_warning( + FutureWarning, match="convert_float is deprecated" + ): + result = pd.read_excel( + path, header=[0, 1], index_col=0, convert_float=False + ) # need to convert PeriodIndexes to standard Indexes for assert equal expected.columns = expected.columns.set_levels( [[str(i) for i in mi.levels[0]], [str(i) for i in mi.levels[1]]],