Merge branch 'main' into issue#57111_6

jordan-d-murphy · Feb 4, 2024 · 135d20e · 135d20e
2 parents 0008b0e + 1bb4839
commit 135d20e
Show file tree

Hide file tree

Showing 61 changed files with 389 additions and 1,239 deletions.
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
@@ -57,22 +57,6 @@ jobs:
             # Also install zh_CN (its encoding is gb2312) but do not activate it.
             # It will be temporarily activated during tests with locale.setlocale
             extra_loc: "zh_CN"
-          - name: "Copy-on-Write 3.9"
-            env_file: actions-39.yaml
-            pattern: "not slow and not network and not single_cpu"
-            pandas_copy_on_write: "1"
-          - name: "Copy-on-Write 3.10"
-            env_file: actions-310.yaml
-            pattern: "not slow and not network and not single_cpu"
-            pandas_copy_on_write: "1"
-          - name: "Copy-on-Write 3.11"
-            env_file: actions-311.yaml
-            pattern: "not slow and not network and not single_cpu"
-            pandas_copy_on_write: "1"
-          - name: "Copy-on-Write 3.12"
-            env_file: actions-312.yaml
-            pattern: "not slow and not network and not single_cpu"
-            pandas_copy_on_write: "1"
           - name: "Pypy"
             env_file: actions-pypy-39.yaml
             pattern: "not slow and not network and not single_cpu"
@@ -101,7 +85,6 @@ jobs:
       PATTERN: ${{ matrix.pattern }}
       LANG: ${{ matrix.lang || 'C.UTF-8' }}
       LC_ALL: ${{ matrix.lc_all || '' }}
-      PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }}
       PANDAS_CI: ${{ matrix.pandas_ci || '1' }}
       TEST_ARGS: ${{ matrix.test_args || '' }}
       PYTEST_WORKERS: ${{ matrix.pytest_workers || 'auto' }}
@@ -111,7 +94,7 @@ jobs:
       QT_QPA_PLATFORM: offscreen
     concurrency:
       # https://github.community/t/concurrecy-not-work-for-push/183068/7
-      group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_copy_on_write || '' }}
+      group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}}
       cancel-in-progress: true
 
     services:

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -71,7 +71,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
 
     MSG='Partially validate docstrings (PR02)' ;  echo $MSG
     $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=PR02 --ignore_functions \
-        pandas.io.formats.style.Styler.to_excel\
         pandas.CategoricalIndex.rename_categories\
         pandas.CategoricalIndex.reorder_categories\
         pandas.CategoricalIndex.add_categories\

diff --git a/doc/source/user_guide/merging.rst b/doc/source/user_guide/merging.rst
@@ -249,7 +249,7 @@ a :class:`MultiIndex`) associate specific keys with each original :class:`DataFr
    p.plot(frames, result, labels=["df1", "df2", "df3"], vertical=True)
    plt.close("all");
 
-The ``keys`` argument cane override the column names
+The ``keys`` argument can override the column names
 when creating a new :class:`DataFrame` based on existing :class:`Series`.
 
 .. ipython:: python

diff --git a/doc/source/whatsnew/v2.2.1.rst b/doc/source/whatsnew/v2.2.1.rst
@@ -20,6 +20,7 @@ Fixed regressions
 - Fixed regression in :meth:`DataFrame.loc` raising ``IndexError`` for non-unique, masked dtype indexes where result has more than 10,000 rows (:issue:`57027`)
 - Fixed regression in :meth:`DataFrame.sort_index` not producing a stable sort for a index with duplicates (:issue:`57151`)
 - Fixed regression in :meth:`DataFrame.to_dict` with ``orient='list'`` and datetime or timedelta types returning integers (:issue:`54824`)
+- Fixed regression in :meth:`DataFrame.to_json` converting nullable integers to floats (:issue:`57224`)
 - Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` ignoring the ``skipna`` argument (:issue:`57040`)
 - Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` where values containing the minimum or maximum value for the dtype could produce incorrect results (:issue:`57040`)
 - Fixed regression in :meth:`Index.join` raising ``TypeError`` when joining an empty index to a non-empty index containing mixed dtype values (:issue:`57048`)

diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py
@@ -15,7 +15,6 @@
     "option_context",
     "options",
     "using_copy_on_write",
-    "warn_copy_on_write",
 ]
 from pandas._config import config
 from pandas._config import dates  # pyright: ignore[reportUnusedImport]  # noqa: F401
@@ -35,10 +34,6 @@ def using_copy_on_write() -> bool:
     return True
 
 
-def warn_copy_on_write() -> bool:
-    return False
-
-
 def using_nullable_dtypes() -> bool:
     _mode_options = _global_config["mode"]
     return _mode_options["nullable_dtypes"]

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -1966,14 +1966,6 @@ def using_copy_on_write() -> bool:
     return True
 
 
-@pytest.fixture
-def warn_copy_on_write() -> bool:
-    """
-    Fixture to check if Copy-on-Write is in warning mode.
-    """
-    return False
-
-
 @pytest.fixture
 def using_infer_string() -> bool:
     """

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -1364,6 +1364,11 @@ def _to_timedeltaarray(self) -> TimedeltaArray:
         np_array = np_array.astype(np_dtype)
         return TimedeltaArray._simple_new(np_array, dtype=np_dtype)
 
+    def _values_for_json(self) -> np.ndarray:
+        if is_numeric_dtype(self.dtype):
+            return np.asarray(self, dtype=object)
+        return super()._values_for_json()
+
     @doc(ExtensionArray.to_numpy)
     def to_numpy(
         self,

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -431,6 +431,9 @@ def __abs__(self) -> Self:
 
     # ------------------------------------------------------------------
 
+    def _values_for_json(self) -> np.ndarray:
+        return np.asarray(self, dtype=object)
+
     def to_numpy(
         self,
         dtype: npt.DTypeLike | None = None,

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -42,7 +42,6 @@
 from pandas._config import (
     get_option,
     using_copy_on_write,
-    warn_copy_on_write,
 )
 
 from pandas._libs import (
@@ -64,7 +63,6 @@
     _chained_assignment_method_msg,
     _chained_assignment_msg,
     _chained_assignment_warning_method_msg,
-    _chained_assignment_warning_msg,
 )
 from pandas.util._decorators import (
     Appender,
@@ -4199,17 +4197,6 @@ def __setitem__(self, key, value) -> None:
                 warnings.warn(
                     _chained_assignment_msg, ChainedAssignmentError, stacklevel=2
                 )
-        elif not PYPY and not using_copy_on_write():
-            if sys.getrefcount(self) <= 3 and (
-                warn_copy_on_write()
-                or (
-                    not warn_copy_on_write()
-                    and any(b.refs.has_reference() for b in self._mgr.blocks)
-                )
-            ):
-                warnings.warn(
-                    _chained_assignment_warning_msg, FutureWarning, stacklevel=2
-                )
 
         key = com.apply_if_callable(key, self)
 
@@ -4550,7 +4537,7 @@ def _clear_item_cache(self) -> None:
 
     def _get_item_cache(self, item: Hashable) -> Series:
         """Return the cached item, item represents a label indexer."""
-        if using_copy_on_write() or warn_copy_on_write():
+        if using_copy_on_write():
             loc = self.columns.get_loc(item)
             return self._ixs(loc, axis=1)
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -30,7 +30,6 @@
 from pandas._config import (
     config,
     using_copy_on_write,
-    warn_copy_on_write,
 )
 
 from pandas._libs import lib
@@ -105,7 +104,6 @@
 from pandas.errors.cow import (
     _chained_assignment_method_msg,
     _chained_assignment_warning_method_msg,
-    _check_cacher,
 )
 from pandas.util._decorators import (
     deprecate_nonkeyword_arguments,
@@ -220,6 +218,8 @@
     from pandas.core.indexers.objects import BaseIndexer
     from pandas.core.resample import Resampler
 
+import textwrap
+
 # goal is to be able to define the docs close to function, while still being
 # able to share
 _shared_docs = {**_shared_docs}
@@ -2225,6 +2225,12 @@ def _repr_data_resource_(self):
         klass="object",
         storage_options=_shared_docs["storage_options"],
         storage_options_versionadded="1.2.0",
+        extra_parameters=textwrap.dedent(
+            """\
+        engine_kwargs : dict, optional
+            Arbitrary keyword arguments passed to excel engine.
+    """
+        ),
     )
     def to_excel(
         self,
@@ -2300,9 +2306,7 @@ def to_excel(
         {storage_options}
 
             .. versionadded:: {storage_options_versionadded}
-        engine_kwargs : dict, optional
-            Arbitrary keyword arguments passed to excel engine.
-
+        {extra_parameters}
         See Also
         --------
         to_csv : Write DataFrame to a comma-separated values (csv) file.
@@ -4386,7 +4390,7 @@ def _check_setitem_copy(self, t: str = "setting", force: bool_t = False) -> None
         df.iloc[0:5]['group'] = 'a'
 
         """
-        if using_copy_on_write() or warn_copy_on_write():
+        if using_copy_on_write():
             return
 
         # return early if the check is not needed
@@ -7235,22 +7239,6 @@ def fillna(
                         ChainedAssignmentError,
                         stacklevel=2,
                     )
-            elif (
-                not PYPY
-                and not using_copy_on_write()
-                and self._is_view_after_cow_rules()
-            ):
-                ctr = sys.getrefcount(self)
-                ref_count = REF_COUNT
-                if isinstance(self, ABCSeries) and _check_cacher(self):
-                    # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
-                    ref_count += 1
-                if ctr <= ref_count:
-                    warnings.warn(
-                        _chained_assignment_warning_method_msg,
-                        FutureWarning,
-                        stacklevel=2,
-                    )
 
         value, method = validate_fillna_kwargs(value, method)
         if method is not None:
@@ -7538,22 +7526,6 @@ def ffill(
                         ChainedAssignmentError,
                         stacklevel=2,
                     )
-            elif (
-                not PYPY
-                and not using_copy_on_write()
-                and self._is_view_after_cow_rules()
-            ):
-                ctr = sys.getrefcount(self)
-                ref_count = REF_COUNT
-                if isinstance(self, ABCSeries) and _check_cacher(self):
-                    # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
-                    ref_count += 1
-                if ctr <= ref_count:
-                    warnings.warn(
-                        _chained_assignment_warning_method_msg,
-                        FutureWarning,
-                        stacklevel=2,
-                    )
 
         return self._pad_or_backfill(
             "ffill",
@@ -7742,22 +7714,6 @@ def bfill(
                         ChainedAssignmentError,
                         stacklevel=2,
                     )
-            elif (
-                not PYPY
-                and not using_copy_on_write()
-                and self._is_view_after_cow_rules()
-            ):
-                ctr = sys.getrefcount(self)
-                ref_count = REF_COUNT
-                if isinstance(self, ABCSeries) and _check_cacher(self):
-                    # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
-                    ref_count += 1
-                if ctr <= ref_count:
-                    warnings.warn(
-                        _chained_assignment_warning_method_msg,
-                        FutureWarning,
-                        stacklevel=2,
-                    )
 
         return self._pad_or_backfill(
             "bfill",
@@ -7913,26 +7869,6 @@ def replace(
                         ChainedAssignmentError,
                         stacklevel=2,
                     )
-            elif (
-                not PYPY
-                and not using_copy_on_write()
-                and self._is_view_after_cow_rules()
-            ):
-                ctr = sys.getrefcount(self)
-                ref_count = REF_COUNT
-                if isinstance(self, ABCSeries) and _check_cacher(self):
-                    # in non-CoW mode, chained Series access will populate the
-                    # `_item_cache` which results in an increased ref count not below
-                    # the threshold, while we still need to warn. We detect this case
-                    # of a Series derived from a DataFrame through the presence of
-                    # checking the `_cacher`
-                    ref_count += 1
-                if ctr <= ref_count:
-                    warnings.warn(
-                        _chained_assignment_warning_method_msg,
-                        FutureWarning,
-                        stacklevel=2,
-                    )
 
         if not is_bool(regex) and to_replace is not None:
             raise ValueError("'to_replace' must be 'None' if 'regex' is not a bool")
@@ -8363,22 +8299,6 @@ def interpolate(
                         ChainedAssignmentError,
                         stacklevel=2,
                     )
-            elif (
-                not PYPY
-                and not using_copy_on_write()
-                and self._is_view_after_cow_rules()
-            ):
-                ctr = sys.getrefcount(self)
-                ref_count = REF_COUNT
-                if isinstance(self, ABCSeries) and _check_cacher(self):
-                    # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
-                    ref_count += 1
-                if ctr <= ref_count:
-                    warnings.warn(
-                        _chained_assignment_warning_method_msg,
-                        FutureWarning,
-                        stacklevel=2,
-                    )
 
         axis = self._get_axis_number(axis)
 
@@ -10548,7 +10468,6 @@ def _where(
         inplace: bool_t = False,
         axis: Axis | None = None,
         level=None,
-        warn: bool_t = True,
     ):
         """
         Equivalent to public method `where`, except that `other` is not
@@ -10679,7 +10598,7 @@ def _where(
             # we may have different type blocks come out of putmask, so
             # reconstruct the block manager
 
-            new_data = self._mgr.putmask(mask=cond, new=other, align=align, warn=warn)
+            new_data = self._mgr.putmask(mask=cond, new=other, align=align)
             result = self._constructor_from_mgr(new_data, axes=new_data.axes)
             return self._update_inplace(result)
 
@@ -12545,29 +12464,8 @@ def _inplace_method(self, other, op) -> Self:
         """
         Wrap arithmetic method to operate inplace.
         """
-        warn = True
-        if not PYPY and warn_copy_on_write():
-            if sys.getrefcount(self) <= REF_COUNT + 2:
-                # we are probably in an inplace setitem context (e.g. df['a'] += 1)
-                warn = False
-
         result = op(self, other)
 
-        if (
-            self.ndim == 1
-            and result._indexed_same(self)
-            and result.dtype == self.dtype
-            and not using_copy_on_write()
-            and not (warn_copy_on_write() and not warn)
-        ):
-            # GH#36498 this inplace op can _actually_ be inplace.
-            # Item "BlockManager" of "Union[BlockManager, SingleBlockManager]" has
-            # no attribute "setitem_inplace"
-            self._mgr.setitem_inplace(  # type: ignore[union-attr]
-                slice(None), result._values, warn=warn
-            )
-            return self
-
         # Delete cacher
         self._reset_cacher()