Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: move shift logic from BlockManager to DataFrame #40536

Merged
merged 6 commits into from
Mar 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5240,6 +5240,7 @@ Write to a feather file.
Read from a feather file.

.. ipython:: python
:okwarning:

result = pd.read_feather("example.feather")
result
Expand Down Expand Up @@ -5323,6 +5324,7 @@ Write to a parquet file.
Read from a parquet file.

.. ipython:: python
:okwarning:

result = pd.read_parquet("example_fp.parquet", engine="fastparquet")
result = pd.read_parquet("example_pa.parquet", engine="pyarrow")
Expand Down
51 changes: 34 additions & 17 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5080,28 +5080,45 @@ def shift(
axis = self._get_axis_number(axis)

ncols = len(self.columns)
if axis == 1 and periods != 0 and fill_value is lib.no_default and ncols > 0:
# We will infer fill_value to match the closest column

# Use a column that we know is valid for our column's dtype GH#38434
label = self.columns[0]
if (
axis == 1
and periods != 0
and ncols > 0
and (fill_value is lib.no_default or len(self._mgr.arrays) > 1)
):
# Exclude single-array-with-fill_value case so we issue a FutureWarning
# if an integer is passed with datetimelike dtype GH#31971
from pandas import concat

# tail: the data that is still in our shifted DataFrame
if periods > 0:
result = self.iloc[:, :-periods]
for col in range(min(ncols, abs(periods))):
# TODO(EA2D): doing this in a loop unnecessary with 2D EAs
# Define filler inside loop so we get a copy
filler = self.iloc[:, 0].shift(len(self))
result.insert(0, label, filler, allow_duplicates=True)
tail = self.iloc[:, :-periods]
else:
result = self.iloc[:, -periods:]
for col in range(min(ncols, abs(periods))):
# Define filler inside loop so we get a copy
filler = self.iloc[:, -1].shift(len(self))
result.insert(
len(result.columns), label, filler, allow_duplicates=True
)
tail = self.iloc[:, -periods:]
# pin a simple Index to avoid costly casting
tail.columns = range(len(tail.columns))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are we guaranteed to copy here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

copy what?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh. tail is not necessarily a copy of the data in self, but we do make a copy in the concat call below, so the result we return is always a copy

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right that's the problem you are actually modifying the input here (e.g. you are setting columns).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tail is never the same object as self, just may be a view on it

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

got it ok


if fill_value is not lib.no_default:
# GH#35488
# TODO(EA2D): with 2D EAs we could construct other directly
ser = Series(fill_value, index=self.index)
else:
# We infer fill_value to match the closest column
if periods > 0:
ser = self.iloc[:, 0].shift(len(self))
else:
ser = self.iloc[:, -1].shift(len(self))

width = min(abs(periods), ncols)
other = concat([ser] * width, axis=1)

if periods > 0:
result = concat([other, tail], axis=1)
else:
result = concat([tail, other], axis=1)

result = cast(DataFrame, result)
result.columns = self.columns.copy()
return result

Expand Down
19 changes: 0 additions & 19 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,25 +621,6 @@ def shift(self, periods: int, axis: int, fill_value) -> BlockManager:
if fill_value is lib.no_default:
fill_value = None

if axis == 0 and self.ndim == 2 and self.nblocks > 1:
# GH#35488 we need to watch out for multi-block cases
# We only get here with fill_value not-lib.no_default
ncols = self.shape[0]
if periods > 0:
indexer = [-1] * periods + list(range(ncols - periods))
else:
nper = abs(periods)
indexer = list(range(nper, ncols)) + [-1] * nper
result = self.reindex_indexer(
self.items,
indexer,
axis=0,
fill_value=fill_value,
allow_dups=True,
consolidate=False,
)
return result

return self.apply("shift", periods=periods, axis=axis, fill_value=fill_value)

def fillna(self, value, limit, inplace: bool, downcast) -> BlockManager:
Expand Down
9 changes: 1 addition & 8 deletions pandas/tests/apply/test_frame_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,8 @@ def test_transform_ufunc(axis, float_frame, frame_or_series):


@pytest.mark.parametrize("op", frame_transform_kernels)
def test_transform_groupby_kernel(axis, float_frame, op, using_array_manager, request):
def test_transform_groupby_kernel(axis, float_frame, op, request):
# GH 35964
if using_array_manager and op == "pct_change" and axis in (1, "columns"):
# TODO(ArrayManager) shift with axis=1
request.node.add_marker(
pytest.mark.xfail(
reason="shift axis=1 not yet implemented for ArrayManager"
)
)

args = [0.0] if op == "fillna" else []
if axis == 0 or axis == "index":
Expand Down