From 0542c4be99d4129acc51256d23e82f963af4eca4 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 21 Dec 2020 07:50:01 -0800
Subject: [PATCH 1/5] REF: simplify Index.__new__

---
 pandas/core/indexes/base.py    | 99 ++++++++++++++++++++++++----------
 pandas/core/indexes/numeric.py | 15 ++++--
 2 files changed, 84 insertions(+), 30 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 8d48a6277d412..369e5f87d33ca 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -66,6 +66,12 @@
     validate_all_hashable,
 )
 from pandas.core.dtypes.concat import concat_compat
+from pandas.core.dtypes.dtypes import (
+    CategoricalDtype,
+    DatetimeTZDtype,
+    IntervalDtype,
+    PeriodDtype,
+)
 from pandas.core.dtypes.generic import (
     ABCDatetimeIndex,
     ABCMultiIndex,
@@ -332,11 +338,6 @@ def __new__(
         # index-like
         elif isinstance(data, (np.ndarray, Index, ABCSeries)):
             # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
-            from pandas.core.indexes.numeric import (
-                Float64Index,
-                Int64Index,
-                UInt64Index,
-            )
 
             if dtype is not None:
                 # we need to avoid having numpy coerce
@@ -347,38 +348,27 @@ def __new__(
                 data = _maybe_cast_with_dtype(data, dtype, copy)
                 dtype = data.dtype  # TODO: maybe not for object?
 
-            # maybe coerce to a sub-class
-            if is_signed_integer_dtype(data.dtype):
-                return Int64Index(data, copy=copy, dtype=dtype, name=name)
-            elif is_unsigned_integer_dtype(data.dtype):
-                return UInt64Index(data, copy=copy, dtype=dtype, name=name)
-            elif is_float_dtype(data.dtype):
-                return Float64Index(data, copy=copy, dtype=dtype, name=name)
+            if data.dtype.kind in ["i", "u", "f"]:
+                # maybe coerce to a sub-class
+                klass = cls._dtype_to_subclass(data.dtype)
+                arr = klass._ensure_array(data, dtype, copy)
+                return klass._simple_new(arr, name=name)
+
             elif issubclass(data.dtype.type, bool) or is_bool_dtype(data):
                 subarr = data.astype("object")
             else:
                 subarr = com.asarray_tuplesafe(data, dtype=object)
 
-            # asarray_tuplesafe does not always copy underlying data,
-            # so need to make sure that this happens
-            if copy:
-                subarr = subarr.copy()
-
             if dtype is None:
                 new_data, new_dtype = _maybe_cast_data_without_dtype(subarr)
-                if new_dtype is not None:
-                    return cls(
-                        new_data, dtype=new_dtype, copy=False, name=name, **kwargs
-                    )
+                return cls(new_data, dtype=new_dtype, copy=copy, name=name, **kwargs)
 
+            subarr = cls._ensure_array(subarr, dtype, copy)
             if kwargs:
                 raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}")
-            if subarr.ndim > 1:
-                # GH#13601, GH#20285, GH#27125
-                raise ValueError("Index data must be 1-dimensional")
             return cls._simple_new(subarr, name)
 
-        elif data is None or is_scalar(data):
+        elif is_scalar(data):
             raise cls._scalar_data_error(data)
         elif hasattr(data, "__array__"):
             return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs)
@@ -400,6 +390,60 @@ def __new__(
             subarr = com.asarray_tuplesafe(data, dtype=object)
             return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs)
 
+    @classmethod
+    def _ensure_array(cls, data, dtype, copy: bool):
+        """
+        Ensure we have a valid array to pass to _simple_new.
+        """
+        if data.ndim > 1:
+            # GH#13601, GH#20285, GH#27125
+            raise ValueError("Index data must be 1-dimensional")
+        if copy:
+            # asarray_tuplesafe does not always copy underlying data,
+            #  so need to make sure that this happens
+            data = data.copy()
+        return data
+
+    @classmethod
+    def _dtype_to_subclass(cls, dtype: DtypeObj):
+        # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
+
+        if isinstance(dtype, DatetimeTZDtype) or dtype == np.dtype("M8[ns]"):
+            from pandas import DatetimeIndex
+
+            return DatetimeIndex
+        if dtype == "m8[ns]":
+            from pandas import TimedeltaIndex
+
+            return TimedeltaIndex
+        if isinstance(dtype, CategoricalDtype):
+            from pandas import CategoricalIndex
+
+            return CategoricalIndex
+        if isinstance(dtype, IntervalDtype):
+            from pandas import IntervalIndex
+
+            return IntervalIndex
+        if isinstance(dtype, PeriodDtype):
+            from pandas import PeriodIndex
+
+            return PeriodIndex
+
+        if is_float_dtype(dtype):
+            from pandas import Float64Index
+
+            return Float64Index
+        if is_unsigned_integer_dtype(dtype):
+            from pandas import UInt64Index
+
+            return UInt64Index
+        if is_signed_integer_dtype(dtype):
+            from pandas import Int64Index
+
+            return Int64Index
+
+        raise NotImplementedError(dtype)
+
     """
     NOTE for new Index creation:
 
@@ -6048,6 +6092,7 @@ def _maybe_cast_data_without_dtype(subarr):
         TimedeltaArray,
     )
 
+    assert subarr.dtype == object, subarr.dtype
     inferred = lib.infer_dtype(subarr, skipna=False)
 
     if inferred == "integer":
@@ -6057,11 +6102,11 @@ def _maybe_cast_data_without_dtype(subarr):
         except ValueError:
             pass
 
-        return subarr, object
+        return subarr, np.dtype(object)
 
     elif inferred in ["floating", "mixed-integer-float", "integer-na"]:
         # TODO: Returns IntegerArray for integer-na case in the future
-        return subarr, np.float64
+        return subarr, np.dtype(np.float64)
 
     elif inferred == "interval":
         try:
diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py
index 91d27d9922aa5..2c2888e1c6f72 100644
--- a/pandas/core/indexes/numeric.py
+++ b/pandas/core/indexes/numeric.py
@@ -46,11 +46,20 @@ class NumericIndex(Index):
     _can_hold_strings = False
 
     def __new__(cls, data=None, dtype=None, copy=False, name=None):
-        cls._validate_dtype(dtype)
         name = maybe_extract_name(name, data, cls)
 
-        # Coerce to ndarray if not already ndarray or Index
+        subarr = cls._ensure_array(data, dtype, copy)
+        return cls._simple_new(subarr, name=name)
+
+    @classmethod
+    def _ensure_array(cls, data, dtype, copy: bool):
+        """
+        Ensure we have a valid array to pass to _simple_new.
+        """
+        cls._validate_dtype(dtype)
+
         if not isinstance(data, (np.ndarray, Index)):
+            # Coerce to ndarray if not already ndarray or Index
             if is_scalar(data):
                 raise cls._scalar_data_error(data)
 
@@ -74,7 +83,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None):
             raise ValueError("Index data must be 1-dimensional")
 
         subarr = np.asarray(subarr)
-        return cls._simple_new(subarr, name=name)
+        return subarr
 
     @classmethod
     def _validate_dtype(cls, dtype: Dtype) -> None:

From c897c6021fcd558948d885ff906be4130c0fd3d9 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 21 Dec 2020 08:37:06 -0800
Subject: [PATCH 2/5] REF: Index.__new__ use helpers

---
 pandas/core/indexes/base.py | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 369e5f87d33ca..0e81bee296275 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -337,7 +337,6 @@ def __new__(
 
         # index-like
         elif isinstance(data, (np.ndarray, Index, ABCSeries)):
-            # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
 
             if dtype is not None:
                 # we need to avoid having numpy coerce
@@ -350,29 +349,28 @@ def __new__(
 
             if data.dtype.kind in ["i", "u", "f"]:
                 # maybe coerce to a sub-class
-                klass = cls._dtype_to_subclass(data.dtype)
-                arr = klass._ensure_array(data, dtype, copy)
-                return klass._simple_new(arr, name=name)
-
-            elif issubclass(data.dtype.type, bool) or is_bool_dtype(data):
-                subarr = data.astype("object")
+                arr = data
             else:
-                subarr = com.asarray_tuplesafe(data, dtype=object)
+                arr = com.asarray_tuplesafe(data, dtype=object)
 
-            if dtype is None:
-                new_data, new_dtype = _maybe_cast_data_without_dtype(subarr)
-                return cls(new_data, dtype=new_dtype, copy=copy, name=name, **kwargs)
+                if dtype is None:
+                    new_data, new_dtype = _maybe_cast_data_without_dtype(arr)
+                    return cls(
+                        new_data, dtype=new_dtype, copy=copy, name=name, **kwargs
+                    )
 
-            subarr = cls._ensure_array(subarr, dtype, copy)
+            klass = cls._dtype_to_subclass(arr.dtype)
+            arr = klass._ensure_array(arr, dtype, copy)
             if kwargs:
                 raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}")
-            return cls._simple_new(subarr, name)
+            return klass._simple_new(arr, name)
 
         elif is_scalar(data):
             raise cls._scalar_data_error(data)
         elif hasattr(data, "__array__"):
             return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs)
         else:
+
             if tupleize_cols and is_list_like(data):
                 # GH21470: convert iterable to list before determining if empty
                 if is_iterator(data):
@@ -442,6 +440,10 @@ def _dtype_to_subclass(cls, dtype: DtypeObj):
 
             return Int64Index
 
+        if dtype == object:
+            # NB: assuming away MultiIndex
+            return Index
+
         raise NotImplementedError(dtype)
 
     """

From 7e5b6b9385f5ce03ca1fc5904a86c34f837ed35a Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 21 Dec 2020 10:33:22 -0800
Subject: [PATCH 3/5] dont return dtype from _maybe_cast_data_without_dtype

---
 pandas/core/indexes/base.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 0e81bee296275..7320842f7e460 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -354,7 +354,8 @@ def __new__(
                 arr = com.asarray_tuplesafe(data, dtype=object)
 
                 if dtype is None:
-                    new_data, new_dtype = _maybe_cast_data_without_dtype(arr)
+                    new_data = _maybe_cast_data_without_dtype(arr)
+                    new_dtype = new_data.dtype
                     return cls(
                         new_data, dtype=new_dtype, copy=copy, name=name, **kwargs
                     )
@@ -6100,20 +6101,21 @@ def _maybe_cast_data_without_dtype(subarr):
     if inferred == "integer":
         try:
             data = _try_convert_to_int_array(subarr, False, None)
-            return data, data.dtype
+            return data
         except ValueError:
             pass
 
-        return subarr, np.dtype(object)
+        return subarr
 
     elif inferred in ["floating", "mixed-integer-float", "integer-na"]:
         # TODO: Returns IntegerArray for integer-na case in the future
-        return subarr, np.dtype(np.float64)
+        data = np.asarray(subarr).astype(np.float64)
+        return data
 
     elif inferred == "interval":
         try:
             data = IntervalArray._from_sequence(subarr, copy=False)
-            return data, data.dtype
+            return data
         except ValueError:
             # GH27172: mixed closed Intervals --> object dtype
             pass
@@ -6124,7 +6126,7 @@ def _maybe_cast_data_without_dtype(subarr):
         if inferred.startswith("datetime"):
             try:
                 data = DatetimeArray._from_sequence(subarr, copy=False)
-                return data, data.dtype
+                return data
             except (ValueError, OutOfBoundsDatetime):
                 # GH 27011
                 # If we have mixed timezones, just send it
@@ -6133,15 +6135,15 @@ def _maybe_cast_data_without_dtype(subarr):
 
         elif inferred.startswith("timedelta"):
             data = TimedeltaArray._from_sequence(subarr, copy=False)
-            return data, data.dtype
+            return data
         elif inferred == "period":
             try:
                 data = PeriodArray._from_sequence(subarr)
-                return data, data.dtype
+                return data
             except IncompatibleFrequency:
                 pass
 
-    return subarr, subarr.dtype
+    return subarr
 
 
 def _try_convert_to_int_array(

From 86c28e6e616d1a9cf33a3f1a6451adc3aa315cb9 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 24 Dec 2020 10:18:10 -0800
Subject: [PATCH 4/5] if -> elif

---
 pandas/core/indexes/base.py     | 16 ++++++++--------
 pandas/core/internals/blocks.py |  7 ++-----
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 5066a0080181c..535f603cf694c 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -411,37 +411,37 @@ def _dtype_to_subclass(cls, dtype: DtypeObj):
             from pandas import DatetimeIndex
 
             return DatetimeIndex
-        if dtype == "m8[ns]":
+        elif dtype == "m8[ns]":
             from pandas import TimedeltaIndex
 
             return TimedeltaIndex
-        if isinstance(dtype, CategoricalDtype):
+        elif isinstance(dtype, CategoricalDtype):
             from pandas import CategoricalIndex
 
             return CategoricalIndex
-        if isinstance(dtype, IntervalDtype):
+        elif isinstance(dtype, IntervalDtype):
             from pandas import IntervalIndex
 
             return IntervalIndex
-        if isinstance(dtype, PeriodDtype):
+        elif isinstance(dtype, PeriodDtype):
             from pandas import PeriodIndex
 
             return PeriodIndex
 
-        if is_float_dtype(dtype):
+        elif is_float_dtype(dtype):
             from pandas import Float64Index
 
             return Float64Index
-        if is_unsigned_integer_dtype(dtype):
+        elif is_unsigned_integer_dtype(dtype):
             from pandas import UInt64Index
 
             return UInt64Index
-        if is_signed_integer_dtype(dtype):
+        elif is_signed_integer_dtype(dtype):
             from pandas import Int64Index
 
             return Int64Index
 
-        if dtype == object:
+        elif dtype == object:
             # NB: assuming away MultiIndex
             return Index
 
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index a3744519e9c2b..138a19779b831 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -1063,15 +1063,12 @@ def putmask(self, mask, new, axis: int = 0) -> List["Block"]:
             # We only get here for non-Extension Blocks, so _try_coerce_args
             #  is only relevant for DatetimeBlock and TimedeltaBlock
             if self.dtype.kind in ["m", "M"]:
-                blk = self
-                if not inplace:
-                    blk = self.copy()
-                arr = blk.array_values()
+                arr = self.array_values()
                 arr = cast("NDArrayBackedExtensionArray", arr)
                 if transpose:
                     arr = arr.T
                 arr.putmask(mask, new)
-                return [blk]
+                return [self]
 
             if lib.is_scalar(new):
                 new = convert_scalar_for_putitemlike(new, self.values.dtype)

From 8dc0330f9240b3862c4676bb506052ab62cc2531 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 29 Dec 2020 08:00:55 -0800
Subject: [PATCH 5/5] copy=False in astype

---
 pandas/core/indexes/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 62ca8673fdaed..802f605e37f42 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -6173,7 +6173,7 @@ def _maybe_cast_data_without_dtype(subarr):
 
     elif inferred in ["floating", "mixed-integer-float", "integer-na"]:
         # TODO: Returns IntegerArray for integer-na case in the future
-        data = np.asarray(subarr).astype(np.float64)
+        data = np.asarray(subarr).astype(np.float64, copy=False)
         return data
 
     elif inferred == "interval":