Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG/DEPR: loc.__setitem__ incorrectly accepting positional slices #31840

Merged
merged 24 commits into from
Mar 8, 2020
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
a076e6a
REF: remove iloc case from _convert_slice_indexer
jbrockmendel Feb 7, 2020
dad937e
Merge branch 'master' of https://github.com/pandas-dev/pandas into po…
jbrockmendel Feb 8, 2020
ccd7ad8
REF: Move Loc-only methods to Loc (#31589)
jbrockmendel Feb 8, 2020
9d6514d
Merge branch 'master' of https://github.com/pandas-dev/pandas into po…
jbrockmendel Feb 9, 2020
b4c63ee
Merge branch 'master' of https://github.com/pandas-dev/pandas into po…
jbrockmendel Feb 9, 2020
6b7ba98
Fix incorrect internal usage of loc, xref #31810
jbrockmendel Feb 9, 2020
1970398
BUG: loc.__setitem__ incorrectly accepting positional slices
jbrockmendel Feb 10, 2020
02ece46
Merge branch 'master' of https://github.com/pandas-dev/pandas into po…
jbrockmendel Feb 10, 2020
7ee2edd
update tests
jbrockmendel Feb 11, 2020
6a0ebb4
Merge branch 'master' of https://github.com/pandas-dev/pandas into po…
jbrockmendel Feb 12, 2020
5f550bf
Merge branch 'master' of https://github.com/pandas-dev/pandas into po…
jbrockmendel Feb 21, 2020
a1a7778
Merge branch 'master' of https://github.com/pandas-dev/pandas into po…
jbrockmendel Feb 22, 2020
aa5a6a1
deprecate instead of raise
jbrockmendel Feb 22, 2020
87f424d
whatsnew
jbrockmendel Feb 22, 2020
f84f837
Merge branch 'master' of https://github.com/pandas-dev/pandas into po…
jbrockmendel Feb 23, 2020
8d2a19a
Merge branch 'master' of https://github.com/pandas-dev/pandas into po…
jbrockmendel Feb 24, 2020
75f2db6
update test
jbrockmendel Feb 24, 2020
2933f2c
Update pandas/core/indexes/base.py
jbrockmendel Mar 2, 2020
acb87aa
Merge branch 'master' of https://github.com/pandas-dev/pandas into po…
jbrockmendel Mar 2, 2020
1589b27
Merge branch 'post-31786' of github.com:jbrockmendel/pandas into post…
jbrockmendel Mar 2, 2020
46402f8
update message
jbrockmendel Mar 2, 2020
32cfe70
Merge branch 'master' of https://github.com/pandas-dev/pandas into po…
jbrockmendel Mar 5, 2020
a14aa8d
optimize stacklevel for DataFrame
jbrockmendel Mar 5, 2020
e9a89a5
Merge branch 'master' of https://github.com/pandas-dev/pandas into po…
jbrockmendel Mar 5, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ Deprecations
~~~~~~~~~~~~
- Lookups on a :class:`Series` with a single-item list containing a slice (e.g. ``ser[[slice(0, 4)]]``) are deprecated, will raise in a future version. Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`)
- :meth:`DataFrame.mean` and :meth:`DataFrame.median` with ``numeric_only=None`` will include datetime64 and datetime64tz columns in a future version (:issue:`29941`)
-
- Setting values with ``.loc`` using a positional slice is deprecated and will raise in a future version. Use ``.iloc`` instead (:issue:`31840`)
-

.. ---------------------------------------------------------------------------
Expand Down
10 changes: 10 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3177,8 +3177,18 @@ def is_int(v):
pass

if com.is_null_slice(key):
# It doesn't matter if we are positional or label based
indexer = key
elif is_positional:
if kind == "loc":
# GH#16121, GH#24612, GH#31810
warnings.warn(
"Slicing .loc with a positional slice is not supported, "
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved
"and will raise TypeError in a future version. "
"Use .iloc instead.",
FutureWarning,
stacklevel=5,
)
indexer = key
else:
indexer = self.slice_indexer(start, stop, step, kind=kind)
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/frame/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ def float_frame_with_na():
"""
df = DataFrame(tm.getSeriesData())
# set some NAs
df.loc[5:10] = np.nan
df.loc[15:20, -2:] = np.nan
df.iloc[5:10] = np.nan
df.iloc[15:20, -2:] = np.nan
return df


Expand Down Expand Up @@ -67,8 +67,8 @@ def bool_frame_with_na():
df = DataFrame(tm.getSeriesData()) > 0
df = df.astype(object)
# set some NAs
df.loc[5:10] = np.nan
df.loc[15:20, -2:] = np.nan
df.iloc[5:10] = np.nan
df.iloc[15:20, -2:] = np.nan
return df


Expand Down
10 changes: 5 additions & 5 deletions pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1205,7 +1205,7 @@ def test_setitem_frame_mixed(self, float_string_frame):
piece = DataFrame(
[[1.0, 2.0], [3.0, 4.0]], index=f.index[0:2], columns=["A", "B"]
)
key = (slice(None, 2), ["A", "B"])
key = (f.index[slice(None, 2)], ["A", "B"])
f.loc[key] = piece
tm.assert_almost_equal(f.loc[f.index[0:2], ["A", "B"]].values, piece.values)

Expand All @@ -1216,7 +1216,7 @@ def test_setitem_frame_mixed(self, float_string_frame):
index=list(f.index[0:2]) + ["foo", "bar"],
columns=["A", "B"],
)
key = (slice(None, 2), ["A", "B"])
key = (f.index[slice(None, 2)], ["A", "B"])
f.loc[key] = piece
tm.assert_almost_equal(
f.loc[f.index[0:2:], ["A", "B"]].values, piece.values[0:2]
Expand All @@ -1226,15 +1226,15 @@ def test_setitem_frame_mixed(self, float_string_frame):
f = float_string_frame.copy()
piece = f.loc[f.index[:2], ["A"]]
piece.index = f.index[-2:]
key = (slice(-2, None), ["A", "B"])
key = (f.index[slice(-2, None)], ["A", "B"])
f.loc[key] = piece
piece["B"] = np.nan
tm.assert_almost_equal(f.loc[f.index[-2:], ["A", "B"]].values, piece.values)

# ndarray
f = float_string_frame.copy()
piece = float_string_frame.loc[f.index[:2], ["A", "B"]]
key = (slice(-2, None), ["A", "B"])
key = (f.index[slice(-2, None)], ["A", "B"])
f.loc[key] = piece.values
tm.assert_almost_equal(f.loc[f.index[-2:], ["A", "B"]].values, piece.values)

Expand Down Expand Up @@ -1869,7 +1869,7 @@ def test_setitem_datetimelike_with_inference(self):
df = DataFrame(index=date_range("20130101", periods=4))
df["A"] = np.array([1 * one_hour] * 4, dtype="m8[ns]")
df.loc[:, "B"] = np.array([2 * one_hour] * 4, dtype="m8[ns]")
df.loc[:3, "C"] = np.array([3 * one_hour] * 3, dtype="m8[ns]")
df.loc[df.index[:3], "C"] = np.array([3 * one_hour] * 3, dtype="m8[ns]")
df.loc[:, "D"] = np.array([4 * one_hour] * 4, dtype="m8[ns]")
df.loc[df.index[:3], "E"] = np.array([5 * one_hour] * 3, dtype="m8[ns]")
df["F"] = np.timedelta64("NaT")
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/frame/methods/test_asof.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class TestFrameAsof:
def test_basic(self, date_range_frame):
df = date_range_frame
N = 50
df.loc[15:30, "A"] = np.nan
df.loc[df.index[15:30], "A"] = np.nan
dates = date_range("1/1/1990", periods=N * 3, freq="25s")

result = df.asof(dates)
Expand All @@ -41,7 +41,7 @@ def test_basic(self, date_range_frame):
def test_subset(self, date_range_frame):
N = 10
df = date_range_frame.iloc[:N].copy()
df.loc[4:8, "A"] = np.nan
df.loc[df.index[4:8], "A"] = np.nan
dates = date_range("1/1/1990", periods=N * 3, freq="25s")

# with a subset of A should be the same
Expand Down Expand Up @@ -149,7 +149,7 @@ def test_is_copy(self, date_range_frame):
# doesn't track the parent dataframe / doesn't give SettingWithCopy warnings
df = date_range_frame
N = 50
df.loc[15:30, "A"] = np.nan
df.loc[df.index[15:30], "A"] = np.nan
dates = date_range("1/1/1990", periods=N * 3, freq="25s")

result = df.asof(dates)
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -913,8 +913,8 @@ def test_sum_bools(self):

def test_idxmin(self, float_frame, int_frame):
frame = float_frame
frame.loc[5:10] = np.nan
frame.loc[15:20, -2:] = np.nan
frame.iloc[5:10] = np.nan
frame.iloc[15:20, -2:] = np.nan
for skipna in [True, False]:
for axis in [0, 1]:
for df in [frame, int_frame]:
Expand All @@ -928,8 +928,8 @@ def test_idxmin(self, float_frame, int_frame):

def test_idxmax(self, float_frame, int_frame):
frame = float_frame
frame.loc[5:10] = np.nan
frame.loc[15:20, -2:] = np.nan
frame.iloc[5:10] = np.nan
frame.iloc[15:20, -2:] = np.nan
for skipna in [True, False]:
for axis in [0, 1]:
for df in [frame, int_frame]:
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ def test_apply_yield_list(self, float_frame):
tm.assert_frame_equal(result, float_frame)

def test_apply_reduce_Series(self, float_frame):
float_frame.loc[::2, "A"] = np.nan
float_frame["A"].iloc[::2] = np.nan
expected = float_frame.mean(1)
result = float_frame.apply(np.mean, axis=1)
tm.assert_series_equal(result, expected)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_block_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ def test_convert_objects(self, float_string_frame):
length = len(float_string_frame)
float_string_frame["J"] = "1."
float_string_frame["K"] = "1"
float_string_frame.loc[0:5, ["J", "K"]] = "garbled"
float_string_frame.loc[float_string_frame.index[0:5], ["J", "K"]] = "garbled"
converted = float_string_frame._convert(datetime=True, numeric=True)
assert converted["H"].dtype == "float64"
assert converted["I"].dtype == "int64"
Expand Down
24 changes: 12 additions & 12 deletions pandas/tests/frame/test_cumulative.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ def test_cumsum_corner(self):
result = dm.cumsum() # noqa

def test_cumsum(self, datetime_frame):
datetime_frame.loc[5:10, 0] = np.nan
datetime_frame.loc[10:15, 1] = np.nan
datetime_frame.loc[15:, 2] = np.nan
datetime_frame.iloc[5:10, 0] = np.nan
datetime_frame.iloc[10:15, 1] = np.nan
datetime_frame.iloc[15:, 2] = np.nan

# axis = 0
cumsum = datetime_frame.cumsum()
Expand All @@ -46,9 +46,9 @@ def test_cumsum(self, datetime_frame):
assert np.shape(cumsum_xs) == np.shape(datetime_frame)

def test_cumprod(self, datetime_frame):
datetime_frame.loc[5:10, 0] = np.nan
datetime_frame.loc[10:15, 1] = np.nan
datetime_frame.loc[15:, 2] = np.nan
datetime_frame.iloc[5:10, 0] = np.nan
datetime_frame.iloc[10:15, 1] = np.nan
datetime_frame.iloc[15:, 2] = np.nan

# axis = 0
cumprod = datetime_frame.cumprod()
Expand Down Expand Up @@ -80,9 +80,9 @@ def test_cumprod(self, datetime_frame):
strict=False,
)
def test_cummin(self, datetime_frame):
datetime_frame.loc[5:10, 0] = np.nan
datetime_frame.loc[10:15, 1] = np.nan
datetime_frame.loc[15:, 2] = np.nan
datetime_frame.iloc[5:10, 0] = np.nan
datetime_frame.iloc[10:15, 1] = np.nan
datetime_frame.iloc[15:, 2] = np.nan

# axis = 0
cummin = datetime_frame.cummin()
Expand All @@ -108,9 +108,9 @@ def test_cummin(self, datetime_frame):
strict=False,
)
def test_cummax(self, datetime_frame):
datetime_frame.loc[5:10, 0] = np.nan
datetime_frame.loc[10:15, 1] = np.nan
datetime_frame.loc[15:, 2] = np.nan
datetime_frame.iloc[5:10, 0] = np.nan
datetime_frame.iloc[10:15, 1] = np.nan
datetime_frame.iloc[15:, 2] = np.nan

# axis = 0
cummax = datetime_frame.cummax()
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,7 @@ def create_cols(name):
)

# add in some nans
df_float.loc[30:50, 1:3] = np.nan
df_float.iloc[30:50, 1:3] = np.nan

# ## this is a bug in read_csv right now ####
# df_dt.loc[30:50,1:3] = np.nan
Expand Down
40 changes: 40 additions & 0 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -863,6 +863,7 @@ def test_loc_setitem_empty_append_raises(self):

data = [1, 2]
df = DataFrame(columns=["x", "y"])
df.index = df.index.astype(np.int64)
msg = (
r"None of \[Int64Index\(\[0, 1\], dtype='int64'\)\] "
r"are in the \[index\]"
Expand Down Expand Up @@ -975,3 +976,42 @@ def test_loc_mixed_int_float():

result = ser.loc[1]
assert result == 0


def test_loc_with_positional_slice_deprecation():
# GH#31840
ser = pd.Series(range(4), index=["A", "B", "C", "D"])

with tm.assert_produces_warning(FutureWarning):
ser.loc[:3] = 2

expected = pd.Series([2, 2, 2, 3], index=["A", "B", "C", "D"])
tm.assert_series_equal(ser, expected)


def test_loc_slice_disallows_positional():
# GH#16121, GH#24612, GH#31810
dti = pd.date_range("2016-01-01", periods=3)
df = pd.DataFrame(np.random.random((3, 2)), index=dti)

ser = df[0]

msg = (
"cannot do slice indexing on DatetimeIndex with these "
r"indexers \[1\] of type int"
)

for obj in [df, ser]:
with pytest.raises(TypeError, match=msg):
obj.loc[1:3]

with tm.assert_produces_warning(FutureWarning):
# GH#31840 deprecated incorrect behavior
obj.loc[1:3] = 1

with pytest.raises(TypeError, match=msg):
df.loc[1:3, 1]

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would optimise the stacklevel so it works correctly for dataframe, instead of series.
I would think doing this with dataframe will be more common (it's also the case that is more difficult to solve). At least in our tests it was mainly with dataframes

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated stacklevel

# GH#31840 deprecated incorrect behavior
df.loc[1:3, 1] = 2
34 changes: 17 additions & 17 deletions pandas/tests/io/pytables/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ def test_repr(self, setup_path):
df["timestamp2"] = Timestamp("20010103")
df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0)
df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0)
df.loc[3:6, ["obj1"]] = np.nan
df.loc[df.index[3:6], ["obj1"]] = np.nan
df = df._consolidate()._convert(datetime=True)

with catch_warnings(record=True):
Expand Down Expand Up @@ -846,7 +846,7 @@ def test_put_mixed_type(self, setup_path):
df["timestamp2"] = Timestamp("20010103")
df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0)
df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0)
df.loc[3:6, ["obj1"]] = np.nan
df.loc[df.index[3:6], ["obj1"]] = np.nan
df = df._consolidate()._convert(datetime=True)

with ensure_clean_store(setup_path) as store:
Expand Down Expand Up @@ -1372,11 +1372,11 @@ def check_col(key, name, size):
_maybe_remove(store, "df")
df = tm.makeTimeDataFrame()
df["string"] = "foo"
df.loc[1:4, "string"] = np.nan
df.loc[df.index[1:4], "string"] = np.nan
df["string2"] = "bar"
df.loc[4:8, "string2"] = np.nan
df.loc[df.index[4:8], "string2"] = np.nan
df["string3"] = "bah"
df.loc[1:, "string3"] = np.nan
df.loc[df.index[1:], "string3"] = np.nan
store.append("df", df)
result = store.select("df")
tm.assert_frame_equal(result, df)
Expand Down Expand Up @@ -1492,8 +1492,8 @@ def test_append_with_data_columns(self, setup_path):
# data column selection with a string data_column
df_new = df.copy()
df_new["string"] = "foo"
df_new.loc[1:4, "string"] = np.nan
df_new.loc[5:6, "string"] = "bar"
df_new.loc[df_new.index[1:4], "string"] = np.nan
df_new.loc[df_new.index[5:6], "string"] = "bar"
_maybe_remove(store, "df")
store.append("df", df_new, data_columns=["string"])
result = store.select("df", "string='foo'")
Expand Down Expand Up @@ -1574,12 +1574,12 @@ def check_col(key, name, size):
# doc example
df_dc = df.copy()
df_dc["string"] = "foo"
df_dc.loc[4:6, "string"] = np.nan
df_dc.loc[7:9, "string"] = "bar"
df_dc.loc[df_dc.index[4:6], "string"] = np.nan
df_dc.loc[df_dc.index[7:9], "string"] = "bar"
df_dc["string2"] = "cool"
df_dc["datetime"] = Timestamp("20010102")
df_dc = df_dc._convert(datetime=True)
df_dc.loc[3:5, ["A", "B", "datetime"]] = np.nan
df_dc.loc[df_dc.index[3:5], ["A", "B", "datetime"]] = np.nan

_maybe_remove(store, "df_dc")
store.append(
Expand All @@ -1602,8 +1602,8 @@ def check_col(key, name, size):
np.random.randn(8, 3), index=index, columns=["A", "B", "C"]
)
df_dc["string"] = "foo"
df_dc.loc[4:6, "string"] = np.nan
df_dc.loc[7:9, "string"] = "bar"
df_dc.loc[df_dc.index[4:6], "string"] = np.nan
df_dc.loc[df_dc.index[7:9], "string"] = "bar"
df_dc.loc[:, ["B", "C"]] = df_dc.loc[:, ["B", "C"]].abs()
df_dc["string2"] = "cool"

Expand Down Expand Up @@ -2024,7 +2024,7 @@ def test_table_mixed_dtypes(self, setup_path):
df["timestamp2"] = Timestamp("20010103")
df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0)
df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0)
df.loc[3:6, ["obj1"]] = np.nan
df.loc[df.index[3:6], ["obj1"]] = np.nan
df = df._consolidate()._convert(datetime=True)

with ensure_clean_store(setup_path) as store:
Expand Down Expand Up @@ -2200,7 +2200,7 @@ def test_invalid_terms(self, setup_path):

df = tm.makeTimeDataFrame()
df["string"] = "foo"
df.loc[0:4, "string"] = "bar"
df.loc[df.index[0:4], "string"] = "bar"

store.put("df", df, format="table")

Expand Down Expand Up @@ -3343,7 +3343,7 @@ def test_string_select(self, setup_path):

# test string ==/!=
df["x"] = "none"
df.loc[2:7, "x"] = ""
df.loc[df.index[2:7], "x"] = ""

store.append("df", df, data_columns=["x"])

Expand All @@ -3365,7 +3365,7 @@ def test_string_select(self, setup_path):

# int ==/!=
df["int"] = 1
df.loc[2:7, "int"] = 2
df.loc[df.index[2:7], "int"] = 2

store.append("df3", df, data_columns=["int"])

Expand Down Expand Up @@ -3419,7 +3419,7 @@ def test_read_column(self, setup_path):
# a data column with NaNs, result excludes the NaNs
df3 = df.copy()
df3["string"] = "foo"
df3.loc[4:6, "string"] = np.nan
df3.loc[df3.index[4:6], "string"] = np.nan
store.append("df3", df3, data_columns=["string"])
result = store.select_column("df3", "string")
tm.assert_almost_equal(result.values, df3["string"].values)
Expand Down
Loading