From c5611e87aca9a676e1010957135c3ad022a78b15 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 2 Jan 2025 20:49:06 +0100
Subject: [PATCH] Backport PR #60454: String dtype: coerce missing values in
 indexers for string dtype Index

---
 pandas/_libs/index.pyx                       | 10 +-----
 pandas/tests/frame/indexing/test_indexing.py |  3 --
 pandas/tests/indexes/string/test_indexing.py | 33 ++++++++++----------
 pandas/tests/reshape/test_pivot.py           | 12 +++----
 4 files changed, 22 insertions(+), 36 deletions(-)

diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index 365cc7c3cecfc..8bb839dee436d 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -536,23 +536,15 @@ cdef class StringObjectEngine(ObjectEngine):
 
     cdef:
         object na_value
-        bint uses_na
 
     def __init__(self, ndarray values, na_value):
         super().__init__(values)
         self.na_value = na_value
-        self.uses_na = na_value is C_NA
-
-    cdef bint _checknull(self, object val):
-        if self.uses_na:
-            return val is C_NA
-        else:
-            return util.is_nan(val)
 
     cdef _check_type(self, object val):
         if isinstance(val, str):
             return val
-        elif self._checknull(val):
+        elif checknull(val):
             return self.na_value
         else:
             raise KeyError(val)
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index c0ab51a484cdf..aa81257965696 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -9,8 +9,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 from pandas._libs import iNaT
 from pandas.errors import (
     InvalidIndexError,
@@ -517,7 +515,6 @@ def test_setitem_ambig(self, using_infer_string):
         else:
             assert dm[2].dtype == np.object_
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
     def test_setitem_None(self, float_frame):
         # GH #766
         float_frame[None] = float_frame["A"]
diff --git a/pandas/tests/indexes/string/test_indexing.py b/pandas/tests/indexes/string/test_indexing.py
index d1a278af337b7..648ee47ddc34c 100644
--- a/pandas/tests/indexes/string/test_indexing.py
+++ b/pandas/tests/indexes/string/test_indexing.py
@@ -13,6 +13,15 @@ def _isnan(val):
         return False
 
 
+def _equivalent_na(dtype, null):
+    if dtype.na_value is pd.NA and null is pd.NA:
+        return True
+    elif _isnan(dtype.na_value) and _isnan(null):
+        return True
+    else:
+        return False
+
+
 class TestGetLoc:
     def test_get_loc(self, any_string_dtype):
         index = Index(["a", "b", "c"], dtype=any_string_dtype)
@@ -41,14 +50,7 @@ def test_get_loc_non_missing(self, any_string_dtype, nulls_fixture):
 
     def test_get_loc_missing(self, any_string_dtype, nulls_fixture):
         index = Index(["a", "b", nulls_fixture], dtype=any_string_dtype)
-        if any_string_dtype == "string" and (
-            (any_string_dtype.na_value is pd.NA and nulls_fixture is not pd.NA)
-            or (_isnan(any_string_dtype.na_value) and not _isnan(nulls_fixture))
-        ):
-            with pytest.raises(KeyError):
-                index.get_loc(nulls_fixture)
-        else:
-            assert index.get_loc(nulls_fixture) == 2
+        assert index.get_loc(nulls_fixture) == 2
 
 
 class TestGetIndexer:
@@ -93,9 +95,8 @@ def test_get_indexer_missing(self, any_string_dtype, null, using_infer_string):
         result = index.get_indexer(["a", null, "c"])
         if using_infer_string:
             expected = np.array([0, 2, -1], dtype=np.intp)
-        elif any_string_dtype == "string" and (
-            (any_string_dtype.na_value is pd.NA and null is not pd.NA)
-            or (_isnan(any_string_dtype.na_value) and not _isnan(null))
+        elif any_string_dtype == "string" and not _equivalent_na(
+            any_string_dtype, null
         ):
             expected = np.array([0, -1, -1], dtype=np.intp)
         else:
@@ -115,9 +116,8 @@ def test_get_indexer_non_unique_nas(
         if using_infer_string:
             expected_indexer = np.array([0, 2], dtype=np.intp)
             expected_missing = np.array([], dtype=np.intp)
-        elif any_string_dtype == "string" and (
-            (any_string_dtype.na_value is pd.NA and null is not pd.NA)
-            or (_isnan(any_string_dtype.na_value) and not _isnan(null))
+        elif any_string_dtype == "string" and not _equivalent_na(
+            any_string_dtype, null
         ):
             expected_indexer = np.array([0, -1], dtype=np.intp)
             expected_missing = np.array([1], dtype=np.intp)
@@ -133,9 +133,8 @@ def test_get_indexer_non_unique_nas(
 
         if using_infer_string:
             expected_indexer = np.array([0, 1, 3], dtype=np.intp)
-        elif any_string_dtype == "string" and (
-            (any_string_dtype.na_value is pd.NA and null is not pd.NA)
-            or (_isnan(any_string_dtype.na_value) and not _isnan(null))
+        elif any_string_dtype == "string" and not _equivalent_na(
+            any_string_dtype, null
         ):
             pass
         else:
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 75268ccee1d8c..519564a96aa7e 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -2619,6 +2619,8 @@ def test_pivot_columns_not_given(self):
         with pytest.raises(TypeError, match="missing 1 required keyword-only argument"):
             df.pivot()  # pylint: disable=missing-kwoa
 
+    # this still fails because columns=None gets passed down to unstack as level=None
+    # while at that point None was converted to NaN
     @pytest.mark.xfail(
         using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
     )
@@ -2637,10 +2639,7 @@ def test_pivot_columns_is_none(self):
         expected = DataFrame({1: 3}, index=Index([2], name="b"))
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(
-        using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
-    )
-    def test_pivot_index_is_none(self):
+    def test_pivot_index_is_none(self, using_infer_string):
         # GH#48293
         df = DataFrame({None: [1], "b": 2, "c": 3})
 
@@ -2651,11 +2650,10 @@ def test_pivot_index_is_none(self):
 
         result = df.pivot(columns="b", index=None, values="c")
         expected = DataFrame(3, index=[1], columns=Index([2], name="b"))
+        if using_infer_string:
+            expected.index.name = np.nan
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(
-        using_string_dtype(), reason="TODO(infer_string) None is cast to NaN"
-    )
     def test_pivot_values_is_none(self):
         # GH#48293
         df = DataFrame({None: [1], "b": 2, "c": 3})