Skip to content

Commit

Permalink
DEPR: Deprecate convert_float (pandas-dev#41176)
Browse files Browse the repository at this point in the history
  • Loading branch information
ahawryluk authored and TLouf committed Jun 1, 2021
1 parent ae3577d commit b18982c
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 36 deletions.
9 changes: 0 additions & 9 deletions doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3684,15 +3684,6 @@ one can pass an :class:`~pandas.io.excel.ExcelWriter`.
df1.to_excel(writer, sheet_name="Sheet1")
df2.to_excel(writer, sheet_name="Sheet2")
.. note::

Wringing a little more performance out of ``read_excel``
Internally, Excel stores all numeric data as floats. Because this can
produce unexpected behavior when reading in data, pandas defaults to trying
to convert integers to floats if it doesn't lose information (``1.0 -->
1``). You can pass ``convert_float=False`` to disable this behavior, which
may give a slight performance improvement.

.. _io.excel_writing_buffer:

Writing Excel files to memory
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -676,6 +676,7 @@ Deprecations
- The ``inplace`` parameter of :meth:`Categorical.remove_categories`, :meth:`Categorical.add_categories`, :meth:`Categorical.reorder_categories`, :meth:`Categorical.rename_categories`, :meth:`Categorical.set_categories` is deprecated and will be removed in a future version (:issue:`37643`)
- Deprecated :func:`merge` producing duplicated columns through the ``suffixes`` keyword and already existing columns (:issue:`22818`)
- Deprecated setting :attr:`Categorical._codes`, create a new :class:`Categorical` with the desired codes instead (:issue:`40606`)
- Deprecated the ``convert_float`` optional argument in :func:`read_excel` and :meth:`ExcelFile.parse` (:issue:`41127`)
- Deprecated behavior of :meth:`DatetimeIndex.union` with mixed timezones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`)
- Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`)
- Deprecated passing arguments as positional in :meth:`DataFrame.clip` and :meth:`Series.clip` (other than ``"upper"`` and ``"lower"``) (:issue:`41485`)
Expand Down
33 changes: 19 additions & 14 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import abc
import datetime
import inspect
from io import BytesIO
import os
from textwrap import fill
Expand Down Expand Up @@ -33,6 +32,7 @@
deprecate_nonkeyword_arguments,
doc,
)
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.common import (
is_bool,
Expand Down Expand Up @@ -245,6 +245,10 @@
Convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric
data will be read in as floats: Excel stores all numbers as floats
internally.
.. deprecated:: 1.3.0
convert_float will be removed in a future version
mangle_dupe_cols : bool, default True
Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than
'X'...'X'. Passing in False will cause data to be overwritten if there
Expand Down Expand Up @@ -355,7 +359,7 @@ def read_excel(
thousands=None,
comment=None,
skipfooter=0,
convert_float=True,
convert_float=None,
mangle_dupe_cols=True,
storage_options: StorageOptions = None,
):
Expand Down Expand Up @@ -489,11 +493,21 @@ def parse(
thousands=None,
comment=None,
skipfooter=0,
convert_float=True,
convert_float=None,
mangle_dupe_cols=True,
**kwds,
):

if convert_float is None:
convert_float = True
else:
stacklevel = find_stack_level()
warnings.warn(
"convert_float is deprecated and will be removed in a future version",
FutureWarning,
stacklevel=stacklevel,
)

validate_header_arg(header)

ret_dict = False
Expand Down Expand Up @@ -1206,16 +1220,7 @@ def __init__(
f"only the xls format is supported. Install openpyxl instead."
)
elif ext and ext != "xls":
caller = inspect.stack()[1]
if (
caller.filename.endswith(
os.path.join("pandas", "io", "excel", "_base.py")
)
and caller.function == "read_excel"
):
stacklevel = 4
else:
stacklevel = 2
stacklevel = find_stack_level()
warnings.warn(
f"Your version of xlrd is {xlrd_version}. In xlrd >= 2.0, "
f"only the xls format is supported. Install "
Expand Down Expand Up @@ -1251,7 +1256,7 @@ def parse(
thousands=None,
comment=None,
skipfooter=0,
convert_float=True,
convert_float=None,
mangle_dupe_cols=True,
**kwds,
):
Expand Down
34 changes: 25 additions & 9 deletions pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,9 +434,17 @@ def test_reader_special_dtypes(self, request, read_ext):
float_expected = expected.copy()
float_expected["IntCol"] = float_expected["IntCol"].astype(float)
float_expected.loc[float_expected.index[1], "Str2Col"] = 3.0
actual = pd.read_excel(
basename + read_ext, sheet_name="Sheet1", convert_float=False
)
with tm.assert_produces_warning(
FutureWarning,
match="convert_float is deprecated",
raise_on_extra_warnings=False,
):
# raise_on_extra_warnings because xlrd raises a PendingDeprecationWarning
# on database job Linux_py37_IO (ci/deps/actions-37-db.yaml)
# See GH#41176
actual = pd.read_excel(
basename + read_ext, sheet_name="Sheet1", convert_float=False
)
tm.assert_frame_equal(actual, float_expected)

# check setting Index (assuming xls and xlsx are the same here)
Expand All @@ -456,12 +464,20 @@ def test_reader_special_dtypes(self, request, read_ext):

no_convert_float = float_expected.copy()
no_convert_float["StrCol"] = no_convert_float["StrCol"].apply(str)
actual = pd.read_excel(
basename + read_ext,
sheet_name="Sheet1",
convert_float=False,
converters={"StrCol": str},
)
with tm.assert_produces_warning(
FutureWarning,
match="convert_float is deprecated",
raise_on_extra_warnings=False,
):
# raise_on_extra_warnings because xlrd raises a PendingDeprecationWarning
# on database job Linux_py37_IO (ci/deps/actions-37-db.yaml)
# See GH#41176
actual = pd.read_excel(
basename + read_ext,
sheet_name="Sheet1",
convert_float=False,
converters={"StrCol": str},
)
tm.assert_frame_equal(actual, no_convert_float)

# GH8212 - support for converters and missing values
Expand Down
16 changes: 12 additions & 4 deletions pandas/tests/io/excel/test_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,9 +474,12 @@ def test_int_types(self, np_type, path):
float_frame = df.astype(float)
float_frame.columns = float_frame.columns.astype(float)
float_frame.index = float_frame.index.astype(float)
recons = pd.read_excel(
path, sheet_name="test1", convert_float=False, index_col=0
)
with tm.assert_produces_warning(
FutureWarning, match="convert_float is deprecated"
):
recons = pd.read_excel(
path, sheet_name="test1", convert_float=False, index_col=0
)
tm.assert_frame_equal(recons, float_frame)

@pytest.mark.parametrize("np_type", [np.float16, np.float32, np.float64])
Expand Down Expand Up @@ -1293,7 +1296,12 @@ def test_merged_cell_custom_objects(self, merge_cells, path):
)
expected = DataFrame(np.ones((2, 2)), columns=mi)
expected.to_excel(path)
result = pd.read_excel(path, header=[0, 1], index_col=0, convert_float=False)
with tm.assert_produces_warning(
FutureWarning, match="convert_float is deprecated"
):
result = pd.read_excel(
path, header=[0, 1], index_col=0, convert_float=False
)
# need to convert PeriodIndexes to standard Indexes for assert equal
expected.columns = expected.columns.set_levels(
[[str(i) for i in mi.levels[0]], [str(i) for i in mi.levels[1]]],
Expand Down

0 comments on commit b18982c

Please sign in to comment.