Skip to content

Commit

Permalink
Merge branch 'main' into issue#57111_6
Browse files Browse the repository at this point in the history
  • Loading branch information
jordan-d-murphy authored Feb 4, 2024
2 parents 0008b0e + 1bb4839 commit 135d20e
Show file tree
Hide file tree
Showing 61 changed files with 389 additions and 1,239 deletions.
19 changes: 1 addition & 18 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,22 +57,6 @@ jobs:
# Also install zh_CN (its encoding is gb2312) but do not activate it.
# It will be temporarily activated during tests with locale.setlocale
extra_loc: "zh_CN"
- name: "Copy-on-Write 3.9"
env_file: actions-39.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "1"
- name: "Copy-on-Write 3.10"
env_file: actions-310.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "1"
- name: "Copy-on-Write 3.11"
env_file: actions-311.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "1"
- name: "Copy-on-Write 3.12"
env_file: actions-312.yaml
pattern: "not slow and not network and not single_cpu"
pandas_copy_on_write: "1"
- name: "Pypy"
env_file: actions-pypy-39.yaml
pattern: "not slow and not network and not single_cpu"
Expand Down Expand Up @@ -101,7 +85,6 @@ jobs:
PATTERN: ${{ matrix.pattern }}
LANG: ${{ matrix.lang || 'C.UTF-8' }}
LC_ALL: ${{ matrix.lc_all || '' }}
PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }}
PANDAS_CI: ${{ matrix.pandas_ci || '1' }}
TEST_ARGS: ${{ matrix.test_args || '' }}
PYTEST_WORKERS: ${{ matrix.pytest_workers || 'auto' }}
Expand All @@ -111,7 +94,7 @@ jobs:
QT_QPA_PLATFORM: offscreen
concurrency:
# https://github.community/t/concurrecy-not-work-for-push/183068/7
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_copy_on_write || '' }}
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}}
cancel-in-progress: true

services:
Expand Down
1 change: 0 additions & 1 deletion ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then

MSG='Partially validate docstrings (PR02)' ; echo $MSG
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=PR02 --ignore_functions \
pandas.io.formats.style.Styler.to_excel\
pandas.CategoricalIndex.rename_categories\
pandas.CategoricalIndex.reorder_categories\
pandas.CategoricalIndex.add_categories\
Expand Down
2 changes: 1 addition & 1 deletion doc/source/user_guide/merging.rst
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ a :class:`MultiIndex`) associate specific keys with each original :class:`DataFr
p.plot(frames, result, labels=["df1", "df2", "df3"], vertical=True)
plt.close("all");
The ``keys`` argument cane override the column names
The ``keys`` argument can override the column names
when creating a new :class:`DataFrame` based on existing :class:`Series`.

.. ipython:: python
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Fixed regressions
- Fixed regression in :meth:`DataFrame.loc` raising ``IndexError`` for non-unique, masked dtype indexes where result has more than 10,000 rows (:issue:`57027`)
- Fixed regression in :meth:`DataFrame.sort_index` not producing a stable sort for a index with duplicates (:issue:`57151`)
- Fixed regression in :meth:`DataFrame.to_dict` with ``orient='list'`` and datetime or timedelta types returning integers (:issue:`54824`)
- Fixed regression in :meth:`DataFrame.to_json` converting nullable integers to floats (:issue:`57224`)
- Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` ignoring the ``skipna`` argument (:issue:`57040`)
- Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` where values containing the minimum or maximum value for the dtype could produce incorrect results (:issue:`57040`)
- Fixed regression in :meth:`Index.join` raising ``TypeError`` when joining an empty index to a non-empty index containing mixed dtype values (:issue:`57048`)
Expand Down
5 changes: 0 additions & 5 deletions pandas/_config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
"option_context",
"options",
"using_copy_on_write",
"warn_copy_on_write",
]
from pandas._config import config
from pandas._config import dates # pyright: ignore[reportUnusedImport] # noqa: F401
Expand All @@ -35,10 +34,6 @@ def using_copy_on_write() -> bool:
return True


def warn_copy_on_write() -> bool:
return False


def using_nullable_dtypes() -> bool:
_mode_options = _global_config["mode"]
return _mode_options["nullable_dtypes"]
Expand Down
8 changes: 0 additions & 8 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1966,14 +1966,6 @@ def using_copy_on_write() -> bool:
return True


@pytest.fixture
def warn_copy_on_write() -> bool:
"""
Fixture to check if Copy-on-Write is in warning mode.
"""
return False


@pytest.fixture
def using_infer_string() -> bool:
"""
Expand Down
5 changes: 5 additions & 0 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1364,6 +1364,11 @@ def _to_timedeltaarray(self) -> TimedeltaArray:
np_array = np_array.astype(np_dtype)
return TimedeltaArray._simple_new(np_array, dtype=np_dtype)

def _values_for_json(self) -> np.ndarray:
if is_numeric_dtype(self.dtype):
return np.asarray(self, dtype=object)
return super()._values_for_json()

@doc(ExtensionArray.to_numpy)
def to_numpy(
self,
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,9 @@ def __abs__(self) -> Self:

# ------------------------------------------------------------------

def _values_for_json(self) -> np.ndarray:
return np.asarray(self, dtype=object)

def to_numpy(
self,
dtype: npt.DTypeLike | None = None,
Expand Down
15 changes: 1 addition & 14 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
from pandas._config import (
get_option,
using_copy_on_write,
warn_copy_on_write,
)

from pandas._libs import (
Expand All @@ -64,7 +63,6 @@
_chained_assignment_method_msg,
_chained_assignment_msg,
_chained_assignment_warning_method_msg,
_chained_assignment_warning_msg,
)
from pandas.util._decorators import (
Appender,
Expand Down Expand Up @@ -4199,17 +4197,6 @@ def __setitem__(self, key, value) -> None:
warnings.warn(
_chained_assignment_msg, ChainedAssignmentError, stacklevel=2
)
elif not PYPY and not using_copy_on_write():
if sys.getrefcount(self) <= 3 and (
warn_copy_on_write()
or (
not warn_copy_on_write()
and any(b.refs.has_reference() for b in self._mgr.blocks)
)
):
warnings.warn(
_chained_assignment_warning_msg, FutureWarning, stacklevel=2
)

key = com.apply_if_callable(key, self)

Expand Down Expand Up @@ -4550,7 +4537,7 @@ def _clear_item_cache(self) -> None:

def _get_item_cache(self, item: Hashable) -> Series:
"""Return the cached item, item represents a label indexer."""
if using_copy_on_write() or warn_copy_on_write():
if using_copy_on_write():
loc = self.columns.get_loc(item)
return self._ixs(loc, axis=1)

Expand Down
124 changes: 11 additions & 113 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
from pandas._config import (
config,
using_copy_on_write,
warn_copy_on_write,
)

from pandas._libs import lib
Expand Down Expand Up @@ -105,7 +104,6 @@
from pandas.errors.cow import (
_chained_assignment_method_msg,
_chained_assignment_warning_method_msg,
_check_cacher,
)
from pandas.util._decorators import (
deprecate_nonkeyword_arguments,
Expand Down Expand Up @@ -220,6 +218,8 @@
from pandas.core.indexers.objects import BaseIndexer
from pandas.core.resample import Resampler

import textwrap

# goal is to be able to define the docs close to function, while still being
# able to share
_shared_docs = {**_shared_docs}
Expand Down Expand Up @@ -2225,6 +2225,12 @@ def _repr_data_resource_(self):
klass="object",
storage_options=_shared_docs["storage_options"],
storage_options_versionadded="1.2.0",
extra_parameters=textwrap.dedent(
"""\
engine_kwargs : dict, optional
Arbitrary keyword arguments passed to excel engine.
"""
),
)
def to_excel(
self,
Expand Down Expand Up @@ -2300,9 +2306,7 @@ def to_excel(
{storage_options}
.. versionadded:: {storage_options_versionadded}
engine_kwargs : dict, optional
Arbitrary keyword arguments passed to excel engine.
{extra_parameters}
See Also
--------
to_csv : Write DataFrame to a comma-separated values (csv) file.
Expand Down Expand Up @@ -4386,7 +4390,7 @@ def _check_setitem_copy(self, t: str = "setting", force: bool_t = False) -> None
df.iloc[0:5]['group'] = 'a'
"""
if using_copy_on_write() or warn_copy_on_write():
if using_copy_on_write():
return

# return early if the check is not needed
Expand Down Expand Up @@ -7235,22 +7239,6 @@ def fillna(
ChainedAssignmentError,
stacklevel=2,
)
elif (
not PYPY
and not using_copy_on_write()
and self._is_view_after_cow_rules()
):
ctr = sys.getrefcount(self)
ref_count = REF_COUNT
if isinstance(self, ABCSeries) and _check_cacher(self):
# see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
ref_count += 1
if ctr <= ref_count:
warnings.warn(
_chained_assignment_warning_method_msg,
FutureWarning,
stacklevel=2,
)

value, method = validate_fillna_kwargs(value, method)
if method is not None:
Expand Down Expand Up @@ -7538,22 +7526,6 @@ def ffill(
ChainedAssignmentError,
stacklevel=2,
)
elif (
not PYPY
and not using_copy_on_write()
and self._is_view_after_cow_rules()
):
ctr = sys.getrefcount(self)
ref_count = REF_COUNT
if isinstance(self, ABCSeries) and _check_cacher(self):
# see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
ref_count += 1
if ctr <= ref_count:
warnings.warn(
_chained_assignment_warning_method_msg,
FutureWarning,
stacklevel=2,
)

return self._pad_or_backfill(
"ffill",
Expand Down Expand Up @@ -7742,22 +7714,6 @@ def bfill(
ChainedAssignmentError,
stacklevel=2,
)
elif (
not PYPY
and not using_copy_on_write()
and self._is_view_after_cow_rules()
):
ctr = sys.getrefcount(self)
ref_count = REF_COUNT
if isinstance(self, ABCSeries) and _check_cacher(self):
# see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
ref_count += 1
if ctr <= ref_count:
warnings.warn(
_chained_assignment_warning_method_msg,
FutureWarning,
stacklevel=2,
)

return self._pad_or_backfill(
"bfill",
Expand Down Expand Up @@ -7913,26 +7869,6 @@ def replace(
ChainedAssignmentError,
stacklevel=2,
)
elif (
not PYPY
and not using_copy_on_write()
and self._is_view_after_cow_rules()
):
ctr = sys.getrefcount(self)
ref_count = REF_COUNT
if isinstance(self, ABCSeries) and _check_cacher(self):
# in non-CoW mode, chained Series access will populate the
# `_item_cache` which results in an increased ref count not below
# the threshold, while we still need to warn. We detect this case
# of a Series derived from a DataFrame through the presence of
# checking the `_cacher`
ref_count += 1
if ctr <= ref_count:
warnings.warn(
_chained_assignment_warning_method_msg,
FutureWarning,
stacklevel=2,
)

if not is_bool(regex) and to_replace is not None:
raise ValueError("'to_replace' must be 'None' if 'regex' is not a bool")
Expand Down Expand Up @@ -8363,22 +8299,6 @@ def interpolate(
ChainedAssignmentError,
stacklevel=2,
)
elif (
not PYPY
and not using_copy_on_write()
and self._is_view_after_cow_rules()
):
ctr = sys.getrefcount(self)
ref_count = REF_COUNT
if isinstance(self, ABCSeries) and _check_cacher(self):
# see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
ref_count += 1
if ctr <= ref_count:
warnings.warn(
_chained_assignment_warning_method_msg,
FutureWarning,
stacklevel=2,
)

axis = self._get_axis_number(axis)

Expand Down Expand Up @@ -10548,7 +10468,6 @@ def _where(
inplace: bool_t = False,
axis: Axis | None = None,
level=None,
warn: bool_t = True,
):
"""
Equivalent to public method `where`, except that `other` is not
Expand Down Expand Up @@ -10679,7 +10598,7 @@ def _where(
# we may have different type blocks come out of putmask, so
# reconstruct the block manager

new_data = self._mgr.putmask(mask=cond, new=other, align=align, warn=warn)
new_data = self._mgr.putmask(mask=cond, new=other, align=align)
result = self._constructor_from_mgr(new_data, axes=new_data.axes)
return self._update_inplace(result)

Expand Down Expand Up @@ -12545,29 +12464,8 @@ def _inplace_method(self, other, op) -> Self:
"""
Wrap arithmetic method to operate inplace.
"""
warn = True
if not PYPY and warn_copy_on_write():
if sys.getrefcount(self) <= REF_COUNT + 2:
# we are probably in an inplace setitem context (e.g. df['a'] += 1)
warn = False

result = op(self, other)

if (
self.ndim == 1
and result._indexed_same(self)
and result.dtype == self.dtype
and not using_copy_on_write()
and not (warn_copy_on_write() and not warn)
):
# GH#36498 this inplace op can _actually_ be inplace.
# Item "BlockManager" of "Union[BlockManager, SingleBlockManager]" has
# no attribute "setitem_inplace"
self._mgr.setitem_inplace( # type: ignore[union-attr]
slice(None), result._values, warn=warn
)
return self

# Delete cacher
self._reset_cacher()

Expand Down
Loading

0 comments on commit 135d20e

Please sign in to comment.