Skip to content

Commit

Permalink
unary_test with nan data. 2 element and 1 element unary tests
Browse files Browse the repository at this point in the history
  • Loading branch information
CarloLepelaars committed Oct 18, 2024
1 parent 4ff077d commit 11efd49
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 2 deletions.
2 changes: 2 additions & 0 deletions narwhals/_arrow/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,8 @@ def skew(self) -> Any:
import pyarrow.compute as pc # ignore-banned-import()

ser = self._native_series
if len(ser) <= 1:
return float("nan")
m = pc.subtract(ser, pc.mean(ser))
m2 = pc.mean(pc.power(m, 2))
m3 = pc.mean(pc.power(m, 3))
Expand Down
2 changes: 2 additions & 0 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,8 @@ def std(

def skew(self) -> Any:
ser = self._native_series
if len(ser) <= 1 or ser.isna().any():
return float("nan")
m = ser.mean()
m2 = ((ser - m) ** 2).mean()
m3 = ((ser - m) ** 3).mean()
Expand Down
113 changes: 111 additions & 2 deletions tests/expr_and_series/unary_test.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,28 @@
from __future__ import annotations

import warnings

import narwhals.stable.v1 as nw
from tests.utils import Constructor
from tests.utils import ConstructorEager
from tests.utils import compare_dicts


def test_unary(constructor: Constructor) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
data = {
"a": [1, 3, 2],
"b": [4, 4, 6],
"c": [7.0, 8.0, float("nan")],
"z": [7.0, 8, 9],
}
result = nw.from_native(constructor(data)).select(
a_mean=nw.col("a").mean(),
a_sum=nw.col("a").sum(),
a_skew=nw.col("a").skew(),
b_nunique=nw.col("b").n_unique(),
b_skew=nw.col("b").skew(),
c_nunique=nw.col("c").n_unique(),
c_skew=nw.col("c").skew(),
z_min=nw.col("z").min(),
z_max=nw.col("z").max(),
)
Expand All @@ -23,21 +32,30 @@ def test_unary(constructor: Constructor) -> None:
"a_skew": [0.0],
"b_nunique": [2],
"b_skew": [0.7071067811865465],
"c_nunique": [3],
"c_skew": [float("nan")],
"z_min": [7],
"z_max": [9],
}
compare_dicts(result, expected)


def test_unary_series(constructor_eager: ConstructorEager) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
data = {
"a": [1, 3, 2],
"b": [4, 4, 6],
"c": [7.0, 8.0, float("nan")],
"z": [7.0, 8, 9],
}
df = nw.from_native(constructor_eager(data), eager_only=True)
result = {
"a_mean": [df["a"].mean()],
"a_sum": [df["a"].sum()],
"a_skew": [df["a"].skew()],
"b_nunique": [df["b"].n_unique()],
"b_skew": [df["b"].skew()],
"c_nunique": [df["c"].n_unique()],
"c_skew": [df["c"].skew()],
"z_min": [df["z"].min()],
"z_max": [df["z"].max()],
}
Expand All @@ -47,7 +65,98 @@ def test_unary_series(constructor_eager: ConstructorEager) -> None:
"a_skew": [0.0],
"b_nunique": [2],
"b_skew": [0.7071067811865465],
"c_nunique": [3],
"c_skew": [float("nan")],
"z_min": [7.0],
"z_max": [9.0],
}
compare_dicts(result, expected)


def test_unary_two_elements(constructor: Constructor) -> None:
data = {"a": [1, 2], "b": [2, 10], "c": [2.0, float("nan")]}
result = nw.from_native(constructor(data)).select(
a_nunique=nw.col("a").n_unique(),
a_skew=nw.col("a").skew(),
b_nunique=nw.col("b").n_unique(),
b_skew=nw.col("b").skew(),
c_nunique=nw.col("c").n_unique(),
c_skew=nw.col("c").skew(),
)
expected = {
"a_nunique": [2],
"a_skew": [0.0],
"b_nunique": [2],
"b_skew": [0.0],
"c_nunique": [2],
"c_skew": [float("nan")],
}
compare_dicts(result, expected)


def test_unary_two_elements_series(constructor_eager: ConstructorEager) -> None:
data = {"a": [1, 2], "b": [2, 10], "c": [2.0, float("nan")]}
df = nw.from_native(constructor_eager(data), eager_only=True)
result = {
"a_nunique": [df["a"].n_unique()],
"a_skew": [df["a"].skew()],
"b_nunique": [df["b"].n_unique()],
"b_skew": [df["b"].skew()],
"c_nunique": [df["c"].n_unique()],
"c_skew": [df["c"].skew()],
}
expected = {
"a_nunique": [2],
"a_skew": [0.0],
"b_nunique": [2],
"b_skew": [0.0],
"c_nunique": [2],
"c_skew": [float("nan")],
}
compare_dicts(result, expected)


def test_unary_one_element(constructor: Constructor) -> None:
data = {"a": [1], "b": [2], "c": [float("nan")]}
# Dask runs into a divide by zero RuntimeWarning for 1 element skew.
with warnings.catch_warnings():
warnings.simplefilter("ignore")
result = nw.from_native(constructor(data)).select(
a_nunique=nw.col("a").n_unique(),
a_skew=nw.col("a").skew(),
b_nunique=nw.col("b").n_unique(),
b_skew=nw.col("b").skew(),
c_nunique=nw.col("c").n_unique(),
c_skew=nw.col("c").skew(),
)
expected = {
"a_nunique": [1],
"a_skew": [float("nan")],
"b_nunique": [1],
"b_skew": [float("nan")],
"c_nunique": [1],
"c_skew": [float("nan")],
}
compare_dicts(result, expected)


def test_unary_one_element_series(constructor_eager: ConstructorEager) -> None:
data = {"a": [1], "b": [2], "c": [float("nan")]}
df = nw.from_native(constructor_eager(data))
result = {
"a_nunique": [df["a"].n_unique()],
"a_skew": [df["a"].skew()],
"b_nunique": [df["b"].n_unique()],
"b_skew": [df["b"].skew()],
"c_nunique": [df["c"].n_unique()],
"c_skew": [df["c"].skew()],
}
expected = {
"a_nunique": [1],
"a_skew": [float("nan")],
"b_nunique": [1],
"b_skew": [float("nan")],
"c_nunique": [1],
"c_skew": [float("nan")],
}
compare_dicts(result, expected)

0 comments on commit 11efd49

Please sign in to comment.