Skip to content

Commit

Permalink
Merge branch 'main' into issue#57111_5
Browse files Browse the repository at this point in the history
  • Loading branch information
jordan-d-murphy authored Feb 3, 2024
2 parents 73466a0 + 94d575a commit 69fae17
Show file tree
Hide file tree
Showing 23 changed files with 121 additions and 60 deletions.
2 changes: 1 addition & 1 deletion doc/source/user_guide/style.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1280,7 +1280,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"You can create \"heatmaps\" with the `background_gradient` and `text_gradient` methods. These require matplotlib, and we'll use [Seaborn](http://seaborn.pydata.org/) to get a nice colormap."
"You can create \"heatmaps\" with the `background_gradient` and `text_gradient` methods. These require matplotlib, and we'll use [Seaborn](https://seaborn.pydata.org/) to get a nice colormap."
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ Plotting
Groupby/resample/rolling
^^^^^^^^^^^^^^^^^^^^^^^^
- Bug in :meth:`.DataFrameGroupBy.quantile` when ``interpolation="nearest"`` is inconsistent with :meth:`DataFrame.quantile` (:issue:`47942`)
-
- Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`)

Reshaping
^^^^^^^^^
Expand Down
3 changes: 3 additions & 0 deletions pandas/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,3 +529,6 @@ def closed(self) -> bool:
Callable[[HashableT], bool],
None,
]

# maintaine the sub-type of any hashable sequence
SequenceT = TypeVar("SequenceT", bound=Sequence[Hashable])
12 changes: 12 additions & 0 deletions pandas/core/window/ewm.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,8 @@ def sum(
):
if not self.adjust:
raise NotImplementedError("sum is not implemented with adjust=False")
if self.times is not None:
raise NotImplementedError("sum is not implemented with times")
if maybe_use_numba(engine):
if self.method == "single":
func = generate_numba_ewm_func
Expand Down Expand Up @@ -658,6 +660,8 @@ def std(self, bias: bool = False, numeric_only: bool = False):
raise NotImplementedError(
f"{type(self).__name__}.std does not implement numeric_only"
)
if self.times is not None:
raise NotImplementedError("std is not implemented with times")
return zsqrt(self.var(bias=bias, numeric_only=numeric_only))

@doc(
Expand Down Expand Up @@ -691,6 +695,8 @@ def std(self, bias: bool = False, numeric_only: bool = False):
agg_method="var",
)
def var(self, bias: bool = False, numeric_only: bool = False):
if self.times is not None:
raise NotImplementedError("var is not implemented with times")
window_func = window_aggregations.ewmcov
wfunc = partial(
window_func,
Expand Down Expand Up @@ -753,6 +759,9 @@ def cov(
bias: bool = False,
numeric_only: bool = False,
):
if self.times is not None:
raise NotImplementedError("cov is not implemented with times")

from pandas import Series

self._validate_numeric_only("cov", numeric_only)
Expand Down Expand Up @@ -837,6 +846,9 @@ def corr(
pairwise: bool | None = None,
numeric_only: bool = False,
):
if self.times is not None:
raise NotImplementedError("corr is not implemented with times")

from pandas import Series

self._validate_numeric_only("corr", numeric_only)
Expand Down
3 changes: 2 additions & 1 deletion pandas/io/excel/_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
if TYPE_CHECKING:
from openpyxl import Workbook
from openpyxl.descriptors.serialisable import Serialisable
from openpyxl.styles import Fill

from pandas._typing import (
ExcelWriterIfSheetExists,
Expand Down Expand Up @@ -244,7 +245,7 @@ def _convert_to_stop(cls, stop_seq):
return map(cls._convert_to_color, stop_seq)

@classmethod
def _convert_to_fill(cls, fill_dict: dict[str, Any]):
def _convert_to_fill(cls, fill_dict: dict[str, Any]) -> Fill:
"""
Convert ``fill_dict`` to an openpyxl v2 Fill object.
Expand Down
4 changes: 3 additions & 1 deletion pandas/io/formats/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,9 @@ def build_border(
for side in ["top", "right", "bottom", "left"]
}

def _border_style(self, style: str | None, width: str | None, color: str | None):
def _border_style(
self, style: str | None, width: str | None, color: str | None
) -> str | None:
# convert styles and widths to openxml, one of:
# 'dashDot'
# 'dashDotDot'
Expand Down
6 changes: 4 additions & 2 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1346,7 +1346,9 @@ def get_result_as_array(self) -> np.ndarray:
the parameters given at initialisation, as a numpy array
"""

def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str):
def format_with_na_rep(
values: ArrayLike, formatter: Callable, na_rep: str
) -> np.ndarray:
mask = isna(values)
formatted = np.array(
[
Expand All @@ -1358,7 +1360,7 @@ def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str):

def format_complex_with_na_rep(
values: ArrayLike, formatter: Callable, na_rep: str
):
) -> np.ndarray:
real_values = np.real(values).ravel() # type: ignore[arg-type]
imag_values = np.imag(values).ravel() # type: ignore[arg-type]
real_mask, imag_mask = isna(real_values), isna(imag_values)
Expand Down
8 changes: 4 additions & 4 deletions pandas/io/formats/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ def dtype_counts(self) -> Mapping[str, int]:

@property
@abstractmethod
def non_null_counts(self) -> Sequence[int]:
def non_null_counts(self) -> list[int] | Series:
"""Sequence of non-null counts for all columns or column (if series)."""

@property
Expand Down Expand Up @@ -486,7 +486,7 @@ def col_count(self) -> int:
return len(self.ids)

@property
def non_null_counts(self) -> Sequence[int]:
def non_null_counts(self) -> Series:
"""Sequence of non-null counts for all columns or column (if series)."""
return self.data.count()

Expand Down Expand Up @@ -546,7 +546,7 @@ def render(
printer.to_buffer(buf)

@property
def non_null_counts(self) -> Sequence[int]:
def non_null_counts(self) -> list[int]:
return [self.data.count()]

@property
Expand Down Expand Up @@ -750,7 +750,7 @@ def memory_usage_string(self) -> str:
return self.info.memory_usage_string

@property
def non_null_counts(self) -> Sequence[int]:
def non_null_counts(self) -> list[int] | Series:
return self.info.non_null_counts

def add_object_type_line(self) -> None:
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/formats/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -3828,7 +3828,7 @@ def _background_gradient(
vmax: float | None = None,
gmap: Sequence | np.ndarray | DataFrame | Series | None = None,
text_only: bool = False,
):
) -> list[str] | DataFrame:
"""
Color background in a range according to the data or a gradient map
"""
Expand Down
4 changes: 3 additions & 1 deletion pandas/io/formats/style_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -2030,7 +2030,9 @@ def _class_styles(self):
}
]

def _pseudo_css(self, uuid: str, name: str, row: int, col: int, text: str):
def _pseudo_css(
self, uuid: str, name: str, row: int, col: int, text: str
) -> list[CSSDict]:
"""
For every table data-cell that has a valid tooltip (not None, NaN or
empty string) must create two pseudo CSS entries for the specific
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,7 @@ def row_is_all_th(row):

def _expand_colspan_rowspan(
self, rows, section: Literal["header", "footer", "body"]
):
) -> list[list]:
"""
Given a list of <tr>s, return a list of text rows.
Expand Down
27 changes: 25 additions & 2 deletions pandas/io/json/_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
TYPE_CHECKING,
Any,
DefaultDict,
overload,
)

import numpy as np
Expand Down Expand Up @@ -42,13 +43,35 @@ def convert_to_line_delimits(s: str) -> str:
return convert_json_to_lines(s)


@overload
def nested_to_record(
ds,
ds: dict,
prefix: str = ...,
sep: str = ...,
level: int = ...,
max_level: int | None = ...,
) -> dict[str, Any]:
...


@overload
def nested_to_record(
ds: list[dict],
prefix: str = ...,
sep: str = ...,
level: int = ...,
max_level: int | None = ...,
) -> list[dict[str, Any]]:
...


def nested_to_record(
ds: dict | list[dict],
prefix: str = "",
sep: str = ".",
level: int = 0,
max_level: int | None = None,
):
) -> dict[str, Any] | list[dict[str, Any]]:
"""
A simplified json_normalize
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def validate_dataframe(df: DataFrame) -> None:
if not isinstance(df, DataFrame):
raise ValueError("to_parquet only supports IO with DataFrames")

def write(self, df: DataFrame, path, compression, **kwargs):
def write(self, df: DataFrame, path, compression, **kwargs) -> None:
raise AbstractMethodError(self)

def read(self, path, columns=None, **kwargs) -> DataFrame:
Expand Down
36 changes: 20 additions & 16 deletions pandas/io/parsers/base_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@

if TYPE_CHECKING:
from collections.abc import (
Hashable,
Iterable,
Mapping,
Sequence,
Expand All @@ -94,7 +93,10 @@
ArrayLike,
DtypeArg,
DtypeObj,
Hashable,
HashableT,
Scalar,
SequenceT,
)


Expand Down Expand Up @@ -350,13 +352,13 @@ def extract(r):
@final
def _maybe_make_multi_index_columns(
self,
columns: Sequence[Hashable],
columns: SequenceT,
col_names: Sequence[Hashable] | None = None,
) -> Sequence[Hashable] | MultiIndex:
) -> SequenceT | MultiIndex:
# possibly create a column mi here
if is_potential_multi_index(columns):
list_columns = cast(list[tuple], columns)
return MultiIndex.from_tuples(list_columns, names=col_names)
columns_mi = cast("Sequence[tuple[Hashable, ...]]", columns)
return MultiIndex.from_tuples(columns_mi, names=col_names)
return columns

@final
Expand Down Expand Up @@ -520,7 +522,7 @@ def _convert_to_ndarrays(
verbose: bool = False,
converters=None,
dtypes=None,
):
) -> dict[Any, np.ndarray]:
result = {}
for c, values in dct.items():
conv_f = None if converters is None else converters.get(c, None)
Expand Down Expand Up @@ -923,23 +925,23 @@ def _check_data_length(
@overload
def _evaluate_usecols(
self,
usecols: set[int] | Callable[[Hashable], object],
names: Sequence[Hashable],
usecols: Callable[[Hashable], object],
names: Iterable[Hashable],
) -> set[int]:
...

@overload
def _evaluate_usecols(
self, usecols: set[str], names: Sequence[Hashable]
) -> set[str]:
self, usecols: SequenceT, names: Iterable[Hashable]
) -> SequenceT:
...

@final
def _evaluate_usecols(
self,
usecols: Callable[[Hashable], object] | set[str] | set[int],
names: Sequence[Hashable],
) -> set[str] | set[int]:
usecols: Callable[[Hashable], object] | SequenceT,
names: Iterable[Hashable],
) -> SequenceT | set[int]:
"""
Check whether or not the 'usecols' parameter
is a callable. If so, enumerates the 'names'
Expand All @@ -952,7 +954,7 @@ def _evaluate_usecols(
return usecols

@final
def _validate_usecols_names(self, usecols, names: Sequence):
def _validate_usecols_names(self, usecols: SequenceT, names: Sequence) -> SequenceT:
"""
Validates that all usecols are present in a given
list of names. If not, raise a ValueError that
Expand Down Expand Up @@ -1072,7 +1074,9 @@ def _clean_index_names(self, columns, index_col) -> tuple[list | None, list, lis
return index_names, columns, index_col

@final
def _get_empty_meta(self, columns, dtype: DtypeArg | None = None):
def _get_empty_meta(
self, columns: Sequence[HashableT], dtype: DtypeArg | None = None
) -> tuple[Index, list[HashableT], dict[HashableT, Series]]:
columns = list(columns)

index_col = self.index_col
Expand Down Expand Up @@ -1275,7 +1279,7 @@ def _process_date_conversion(
columns,
keep_date_col: bool = False,
dtype_backend=lib.no_default,
):
) -> tuple[dict, list]:
def _isindex(colspec):
return (isinstance(index_col, list) and colspec in index_col) or (
isinstance(index_names, list) and colspec in index_names
Expand Down
14 changes: 10 additions & 4 deletions pandas/io/parsers/c_parser_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,12 @@
)

from pandas._typing import (
AnyArrayLike,
ArrayLike,
DtypeArg,
DtypeObj,
ReadCsvBuffer,
SequenceT,
)

from pandas import (
Expand Down Expand Up @@ -225,7 +227,7 @@ def read(
) -> tuple[
Index | MultiIndex | None,
Sequence[Hashable] | MultiIndex,
Mapping[Hashable, ArrayLike],
Mapping[Hashable, AnyArrayLike],
]:
index: Index | MultiIndex | None
column_names: Sequence[Hashable] | MultiIndex
Expand All @@ -248,7 +250,11 @@ def read(
names,
dtype=self.dtype,
)
columns = self._maybe_make_multi_index_columns(columns, self.col_names)
# error: Incompatible types in assignment (expression has type
# "list[Hashable] | MultiIndex", variable has type "list[Hashable]")
columns = self._maybe_make_multi_index_columns( # type: ignore[assignment]
columns, self.col_names
)

if self.usecols is not None:
columns = self._filter_usecols(columns)
Expand Down Expand Up @@ -334,11 +340,11 @@ def read(

return index, column_names, date_data

def _filter_usecols(self, names: Sequence[Hashable]) -> Sequence[Hashable]:
def _filter_usecols(self, names: SequenceT) -> SequenceT | list[Hashable]:
# hackish
usecols = self._evaluate_usecols(self.usecols, names)
if usecols is not None and len(names) != len(usecols):
names = [
return [
name for i, name in enumerate(names) if i in usecols or name in usecols
]
return names
Expand Down
Loading

0 comments on commit 69fae17

Please sign in to comment.