Merge branch 'main' into issue#57111_5

jordan-d-murphy · Feb 3, 2024 · 69fae17 · 69fae17
2 parents 73466a0 + 94d575a
commit 69fae17
Show file tree

Hide file tree

Showing 23 changed files with 121 additions and 60 deletions.
diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb
@@ -1280,7 +1280,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "You can create \"heatmaps\" with the `background_gradient` and `text_gradient` methods. These require matplotlib, and we'll use [Seaborn](http://seaborn.pydata.org/) to get a nice colormap."
+    "You can create \"heatmaps\" with the `background_gradient` and `text_gradient` methods. These require matplotlib, and we'll use [Seaborn](https://seaborn.pydata.org/) to get a nice colormap."
    ]
   },
   {

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -206,7 +206,7 @@ Plotting
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 - Bug in :meth:`.DataFrameGroupBy.quantile` when ``interpolation="nearest"`` is inconsistent with :meth:`DataFrame.quantile` (:issue:`47942`)
--
+- Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`)
 
 Reshaping
 ^^^^^^^^^

diff --git a/pandas/_typing.py b/pandas/_typing.py
@@ -529,3 +529,6 @@ def closed(self) -> bool:
     Callable[[HashableT], bool],
     None,
 ]
+
+# maintaine the sub-type of any hashable sequence
+SequenceT = TypeVar("SequenceT", bound=Sequence[Hashable])
diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py
@@ -587,6 +587,8 @@ def sum(
     ):
         if not self.adjust:
             raise NotImplementedError("sum is not implemented with adjust=False")
+        if self.times is not None:
+            raise NotImplementedError("sum is not implemented with times")
         if maybe_use_numba(engine):
             if self.method == "single":
                 func = generate_numba_ewm_func
@@ -658,6 +660,8 @@ def std(self, bias: bool = False, numeric_only: bool = False):
             raise NotImplementedError(
                 f"{type(self).__name__}.std does not implement numeric_only"
             )
+        if self.times is not None:
+            raise NotImplementedError("std is not implemented with times")
         return zsqrt(self.var(bias=bias, numeric_only=numeric_only))
 
     @doc(
@@ -691,6 +695,8 @@ def std(self, bias: bool = False, numeric_only: bool = False):
         agg_method="var",
     )
     def var(self, bias: bool = False, numeric_only: bool = False):
+        if self.times is not None:
+            raise NotImplementedError("var is not implemented with times")
         window_func = window_aggregations.ewmcov
         wfunc = partial(
             window_func,
@@ -753,6 +759,9 @@ def cov(
         bias: bool = False,
         numeric_only: bool = False,
     ):
+        if self.times is not None:
+            raise NotImplementedError("cov is not implemented with times")
+
         from pandas import Series
 
         self._validate_numeric_only("cov", numeric_only)
@@ -837,6 +846,9 @@ def corr(
         pairwise: bool | None = None,
         numeric_only: bool = False,
     ):
+        if self.times is not None:
+            raise NotImplementedError("corr is not implemented with times")
+
         from pandas import Series
 
         self._validate_numeric_only("corr", numeric_only)

diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
@@ -26,6 +26,7 @@
 if TYPE_CHECKING:
     from openpyxl import Workbook
     from openpyxl.descriptors.serialisable import Serialisable
+    from openpyxl.styles import Fill
 
     from pandas._typing import (
         ExcelWriterIfSheetExists,
@@ -244,7 +245,7 @@ def _convert_to_stop(cls, stop_seq):
         return map(cls._convert_to_color, stop_seq)
 
     @classmethod
-    def _convert_to_fill(cls, fill_dict: dict[str, Any]):
+    def _convert_to_fill(cls, fill_dict: dict[str, Any]) -> Fill:
         """
         Convert ``fill_dict`` to an openpyxl v2 Fill object.
 

diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py
@@ -284,7 +284,9 @@ def build_border(
             for side in ["top", "right", "bottom", "left"]
         }
 
-    def _border_style(self, style: str | None, width: str | None, color: str | None):
+    def _border_style(
+        self, style: str | None, width: str | None, color: str | None
+    ) -> str | None:
         # convert styles and widths to openxml, one of:
         #       'dashDot'
         #       'dashDotDot'

diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -1346,7 +1346,9 @@ def get_result_as_array(self) -> np.ndarray:
         the parameters given at initialisation, as a numpy array
         """
 
-        def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str):
+        def format_with_na_rep(
+            values: ArrayLike, formatter: Callable, na_rep: str
+        ) -> np.ndarray:
             mask = isna(values)
             formatted = np.array(
                 [
@@ -1358,7 +1360,7 @@ def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str):
 
         def format_complex_with_na_rep(
             values: ArrayLike, formatter: Callable, na_rep: str
-        ):
+        ) -> np.ndarray:
             real_values = np.real(values).ravel()  # type: ignore[arg-type]
             imag_values = np.imag(values).ravel()  # type: ignore[arg-type]
             real_mask, imag_mask = isna(real_values), isna(imag_values)

diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py
@@ -392,7 +392,7 @@ def dtype_counts(self) -> Mapping[str, int]:
 
     @property
     @abstractmethod
-    def non_null_counts(self) -> Sequence[int]:
+    def non_null_counts(self) -> list[int] | Series:
         """Sequence of non-null counts for all columns or column (if series)."""
 
     @property
@@ -486,7 +486,7 @@ def col_count(self) -> int:
         return len(self.ids)
 
     @property
-    def non_null_counts(self) -> Sequence[int]:
+    def non_null_counts(self) -> Series:
         """Sequence of non-null counts for all columns or column (if series)."""
         return self.data.count()
 
@@ -546,7 +546,7 @@ def render(
         printer.to_buffer(buf)
 
     @property
-    def non_null_counts(self) -> Sequence[int]:
+    def non_null_counts(self) -> list[int]:
         return [self.data.count()]
 
     @property
@@ -750,7 +750,7 @@ def memory_usage_string(self) -> str:
         return self.info.memory_usage_string
 
     @property
-    def non_null_counts(self) -> Sequence[int]:
+    def non_null_counts(self) -> list[int] | Series:
         return self.info.non_null_counts
 
     def add_object_type_line(self) -> None:

diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
@@ -3828,7 +3828,7 @@ def _background_gradient(
     vmax: float | None = None,
     gmap: Sequence | np.ndarray | DataFrame | Series | None = None,
     text_only: bool = False,
-):
+) -> list[str] | DataFrame:
     """
     Color background in a range according to the data or a gradient map
     """

diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py
@@ -2030,7 +2030,9 @@ def _class_styles(self):
             }
         ]
 
-    def _pseudo_css(self, uuid: str, name: str, row: int, col: int, text: str):
+    def _pseudo_css(
+        self, uuid: str, name: str, row: int, col: int, text: str
+    ) -> list[CSSDict]:
         """
         For every table data-cell that has a valid tooltip (not None, NaN or
         empty string) must create two pseudo CSS entries for the specific

diff --git a/pandas/io/html.py b/pandas/io/html.py
@@ -469,7 +469,7 @@ def row_is_all_th(row):
 
     def _expand_colspan_rowspan(
         self, rows, section: Literal["header", "footer", "body"]
-    ):
+    ) -> list[list]:
         """
         Given a list of <tr>s, return a list of text rows.
 

diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py
@@ -11,6 +11,7 @@
     TYPE_CHECKING,
     Any,
     DefaultDict,
+    overload,
 )
 
 import numpy as np
@@ -42,13 +43,35 @@ def convert_to_line_delimits(s: str) -> str:
     return convert_json_to_lines(s)
 
 
+@overload
 def nested_to_record(
-    ds,
+    ds: dict,
+    prefix: str = ...,
+    sep: str = ...,
+    level: int = ...,
+    max_level: int | None = ...,
+) -> dict[str, Any]:
+    ...
+
+
+@overload
+def nested_to_record(
+    ds: list[dict],
+    prefix: str = ...,
+    sep: str = ...,
+    level: int = ...,
+    max_level: int | None = ...,
+) -> list[dict[str, Any]]:
+    ...
+
+
+def nested_to_record(
+    ds: dict | list[dict],
     prefix: str = "",
     sep: str = ".",
     level: int = 0,
     max_level: int | None = None,
-):
+) -> dict[str, Any] | list[dict[str, Any]]:
     """
     A simplified json_normalize
 

diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
@@ -150,7 +150,7 @@ def validate_dataframe(df: DataFrame) -> None:
         if not isinstance(df, DataFrame):
             raise ValueError("to_parquet only supports IO with DataFrames")
 
-    def write(self, df: DataFrame, path, compression, **kwargs):
+    def write(self, df: DataFrame, path, compression, **kwargs) -> None:
         raise AbstractMethodError(self)
 
     def read(self, path, columns=None, **kwargs) -> DataFrame:

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
@@ -84,7 +84,6 @@
 
 if TYPE_CHECKING:
     from collections.abc import (
-        Hashable,
         Iterable,
         Mapping,
         Sequence,
@@ -94,7 +93,10 @@
         ArrayLike,
         DtypeArg,
         DtypeObj,
+        Hashable,
+        HashableT,
         Scalar,
+        SequenceT,
     )
 
 
@@ -350,13 +352,13 @@ def extract(r):
     @final
     def _maybe_make_multi_index_columns(
         self,
-        columns: Sequence[Hashable],
+        columns: SequenceT,
         col_names: Sequence[Hashable] | None = None,
-    ) -> Sequence[Hashable] | MultiIndex:
+    ) -> SequenceT | MultiIndex:
         # possibly create a column mi here
         if is_potential_multi_index(columns):
-            list_columns = cast(list[tuple], columns)
-            return MultiIndex.from_tuples(list_columns, names=col_names)
+            columns_mi = cast("Sequence[tuple[Hashable, ...]]", columns)
+            return MultiIndex.from_tuples(columns_mi, names=col_names)
         return columns
 
     @final
@@ -520,7 +522,7 @@ def _convert_to_ndarrays(
         verbose: bool = False,
         converters=None,
         dtypes=None,
-    ):
+    ) -> dict[Any, np.ndarray]:
         result = {}
         for c, values in dct.items():
             conv_f = None if converters is None else converters.get(c, None)
@@ -923,23 +925,23 @@ def _check_data_length(
     @overload
     def _evaluate_usecols(
         self,
-        usecols: set[int] | Callable[[Hashable], object],
-        names: Sequence[Hashable],
+        usecols: Callable[[Hashable], object],
+        names: Iterable[Hashable],
     ) -> set[int]:
         ...
 
     @overload
     def _evaluate_usecols(
-        self, usecols: set[str], names: Sequence[Hashable]
-    ) -> set[str]:
+        self, usecols: SequenceT, names: Iterable[Hashable]
+    ) -> SequenceT:
         ...
 
     @final
     def _evaluate_usecols(
         self,
-        usecols: Callable[[Hashable], object] | set[str] | set[int],
-        names: Sequence[Hashable],
-    ) -> set[str] | set[int]:
+        usecols: Callable[[Hashable], object] | SequenceT,
+        names: Iterable[Hashable],
+    ) -> SequenceT | set[int]:
         """
         Check whether or not the 'usecols' parameter
         is a callable.  If so, enumerates the 'names'
@@ -952,7 +954,7 @@ def _evaluate_usecols(
         return usecols
 
     @final
-    def _validate_usecols_names(self, usecols, names: Sequence):
+    def _validate_usecols_names(self, usecols: SequenceT, names: Sequence) -> SequenceT:
         """
         Validates that all usecols are present in a given
         list of names. If not, raise a ValueError that
@@ -1072,7 +1074,9 @@ def _clean_index_names(self, columns, index_col) -> tuple[list | None, list, lis
         return index_names, columns, index_col
 
     @final
-    def _get_empty_meta(self, columns, dtype: DtypeArg | None = None):
+    def _get_empty_meta(
+        self, columns: Sequence[HashableT], dtype: DtypeArg | None = None
+    ) -> tuple[Index, list[HashableT], dict[HashableT, Series]]:
         columns = list(columns)
 
         index_col = self.index_col
@@ -1275,7 +1279,7 @@ def _process_date_conversion(
     columns,
     keep_date_col: bool = False,
     dtype_backend=lib.no_default,
-):
+) -> tuple[dict, list]:
     def _isindex(colspec):
         return (isinstance(index_col, list) and colspec in index_col) or (
             isinstance(index_names, list) and colspec in index_names

diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py
@@ -41,10 +41,12 @@
     )
 
     from pandas._typing import (
+        AnyArrayLike,
         ArrayLike,
         DtypeArg,
         DtypeObj,
         ReadCsvBuffer,
+        SequenceT,
     )
 
     from pandas import (
@@ -225,7 +227,7 @@ def read(
     ) -> tuple[
         Index | MultiIndex | None,
         Sequence[Hashable] | MultiIndex,
-        Mapping[Hashable, ArrayLike],
+        Mapping[Hashable, AnyArrayLike],
     ]:
         index: Index | MultiIndex | None
         column_names: Sequence[Hashable] | MultiIndex
@@ -248,7 +250,11 @@ def read(
                     names,
                     dtype=self.dtype,
                 )
-                columns = self._maybe_make_multi_index_columns(columns, self.col_names)
+                # error: Incompatible types in assignment (expression has type
+                # "list[Hashable] | MultiIndex", variable has type "list[Hashable]")
+                columns = self._maybe_make_multi_index_columns(  # type: ignore[assignment]
+                    columns, self.col_names
+                )
 
                 if self.usecols is not None:
                     columns = self._filter_usecols(columns)
@@ -334,11 +340,11 @@ def read(
 
         return index, column_names, date_data
 
-    def _filter_usecols(self, names: Sequence[Hashable]) -> Sequence[Hashable]:
+    def _filter_usecols(self, names: SequenceT) -> SequenceT | list[Hashable]:
         # hackish
         usecols = self._evaluate_usecols(self.usecols, names)
         if usecols is not None and len(names) != len(usecols):
-            names = [
+            return [
                 name for i, name in enumerate(names) if i in usecols or name in usecols
             ]
         return names