fix: Expand Int128 testing and fix failing tests

pola-rs · Dec 29, 2024 · c9ba7bf · c9ba7bf
1 parent f5f4cb5
commit c9ba7bf
Show file tree

Hide file tree

Showing 14 changed files with 53 additions and 156 deletions.
diff --git a/crates/polars-python/src/map/mod.rs b/crates/polars-python/src/map/mod.rs
@@ -28,6 +28,7 @@ impl PyArrowPrimitiveType for Int8Type {}
 impl PyArrowPrimitiveType for Int16Type {}
 impl PyArrowPrimitiveType for Int32Type {}
 impl PyArrowPrimitiveType for Int64Type {}
+impl PyArrowPrimitiveType for Int128Type {}
 impl PyArrowPrimitiveType for Float32Type {}
 impl PyArrowPrimitiveType for Float64Type {}
 

diff --git a/crates/polars-python/src/series/map.rs b/crates/polars-python/src/series/map.rs
@@ -141,6 +141,17 @@ impl PySeries {
                     )?;
                     ca.into_series()
                 },
+                Some(DataType::Int128) => {
+                    let ca: Int128Chunked = dispatch_apply!(
+                        series,
+                        apply_lambda_with_primitive_out_type,
+                        py,
+                        function,
+                        0,
+                        None
+                    )?;
+                    ca.into_series()
+                },
                 Some(DataType::UInt8) => {
                     let ca: UInt8Chunked = dispatch_apply!(
                         series,

diff --git a/crates/polars-python/src/utils.rs b/crates/polars-python/src/utils.rs
@@ -13,6 +13,7 @@ macro_rules! apply_method_all_arrow_series2 {
             DataType::Int16 => $self.i16().unwrap().$method($($args),*),
             DataType::Int32 => $self.i32().unwrap().$method($($args),*),
             DataType::Int64 => $self.i64().unwrap().$method($($args),*),
+            DataType::Int128 => $self.i128().unwrap().$method($($args),*),
             DataType::Float32 => $self.f32().unwrap().$method($($args),*),
             DataType::Float64 => $self.f64().unwrap().$method($($args),*),
             DataType::Date => $self.date().unwrap().$method($($args),*),

diff --git a/py-polars/tests/unit/dataframe/test_getitem.py b/py-polars/tests/unit/dataframe/test_getitem.py
@@ -10,6 +10,7 @@
 import polars as pl
 from polars.testing import assert_frame_equal, assert_series_equal
 from polars.testing.parametric import column, dataframes
+from tests.unit.conftest import INTEGER_DTYPES, SIGNED_INTEGER_DTYPES
 
 
 @given(
@@ -309,16 +310,7 @@ def test_df_getitem() -> None:
     assert_frame_equal(df[pl.Series("", ["a", "b"])], df)
 
     # pl.Series: positive idxs or empty idxs for row selection.
-    for pl_dtype in (
-        pl.Int8,
-        pl.Int16,
-        pl.Int32,
-        pl.Int64,
-        pl.UInt8,
-        pl.UInt16,
-        pl.UInt32,
-        pl.UInt64,
-    ):
+    for pl_dtype in INTEGER_DTYPES:
         assert_frame_equal(
             df[pl.Series("", [1, 0, 3, 2, 3, 0], dtype=pl_dtype)],
             pl.DataFrame(
@@ -328,7 +320,7 @@ def test_df_getitem() -> None:
         assert df[pl.Series("", [], dtype=pl_dtype)].columns == ["a", "b"]
 
     # pl.Series: positive and negative idxs for row selection.
-    for pl_dtype in (pl.Int8, pl.Int16, pl.Int32, pl.Int64):
+    for pl_dtype in SIGNED_INTEGER_DTYPES:
         assert_frame_equal(
             df[pl.Series("", [-1, 0, -3, -2, 3, -4], dtype=pl_dtype)],
             pl.DataFrame(

diff --git a/py-polars/tests/unit/datatypes/test_enum.py b/py-polars/tests/unit/datatypes/test_enum.py
@@ -21,6 +21,7 @@
     SchemaError,
 )
 from polars.testing import assert_frame_equal, assert_series_equal
+from tests.unit.conftest import INTEGER_DTYPES
 
 if sys.version_info >= (3, 11):
     from enum import StrEnum
@@ -498,10 +499,7 @@ def test_enum_categories_series_zero_copy() -> None:
     assert result_dtype == dtype
 
 
-@pytest.mark.parametrize(
-    "dtype",
-    [pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64, pl.Int8, pl.Int16, pl.Int32, pl.Int64],
-)
+@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
 def test_enum_cast_from_other_integer_dtype(dtype: pl.DataType) -> None:
     enum_dtype = pl.Enum(["a", "b", "c", "d"])
     series = pl.Series([1, 2, 3, 3, 2, 1], dtype=dtype)
@@ -585,19 +583,7 @@ def test_category_comparison_subset() -> None:
     assert out["dt1"].dtype != out["dt2"].dtype
 
 
-@pytest.mark.parametrize(
-    "dt",
-    [
-        pl.UInt8,
-        pl.UInt16,
-        pl.UInt32,
-        pl.UInt64,
-        pl.Int8,
-        pl.Int16,
-        pl.Int32,
-        pl.Int64,
-    ],
-)
+@pytest.mark.parametrize("dt", INTEGER_DTYPES)
 def test_integer_cast_to_enum_15738(dt: pl.DataType) -> None:
     s = pl.Series([0, 1, 2], dtype=dt).cast(pl.Enum(["a", "b", "c"]))
     assert s.to_list() == ["a", "b", "c"]

diff --git a/py-polars/tests/unit/lazyframe/test_lazyframe.py b/py-polars/tests/unit/lazyframe/test_lazyframe.py
@@ -19,7 +19,7 @@
     PolarsInefficientMapWarning,
 )
 from polars.testing import assert_frame_equal, assert_series_equal
-from tests.unit.conftest import FLOAT_DTYPES
+from tests.unit.conftest import FLOAT_DTYPES, NUMERIC_DTYPES
 
 if TYPE_CHECKING:
     from _pytest.capture import CaptureFixture
@@ -1334,21 +1334,7 @@ def test_compare_schema_between_lazy_and_eager_6904() -> None:
 
 
 @pytest.mark.slow
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        pl.UInt8,
-        pl.UInt16,
-        pl.UInt32,
-        pl.UInt64,
-        pl.Int8,
-        pl.Int16,
-        pl.Int32,
-        pl.Int64,
-        pl.Float32,
-        pl.Float64,
-    ],
-)
+@pytest.mark.parametrize("dtype", NUMERIC_DTYPES)
 @pytest.mark.parametrize(
     "func",
     [

diff --git a/py-polars/tests/unit/operations/map/test_map_elements.py b/py-polars/tests/unit/operations/map/test_map_elements.py
@@ -9,6 +9,7 @@
 import polars as pl
 from polars.exceptions import PolarsInefficientMapWarning
 from polars.testing import assert_frame_equal, assert_series_equal
+from tests.unit.conftest import INTEGER_DTYPES
 
 pytestmark = pytest.mark.filterwarnings(
     "ignore::polars.exceptions.PolarsInefficientMapWarning"
@@ -129,18 +130,8 @@ def test_map_elements_list_any_value_fallback() -> None:
 
 
 def test_map_elements_all_types() -> None:
-    dtypes = [
-        pl.UInt8,
-        pl.UInt16,
-        pl.UInt32,
-        pl.UInt64,
-        pl.Int8,
-        pl.Int16,
-        pl.Int32,
-        pl.Int64,
-    ]
     # test we don't panic
-    for dtype in dtypes:
+    for dtype in INTEGER_DTYPES:
         pl.Series([1, 2, 3, 4, 5], dtype=dtype).map_elements(lambda x: x)
 
 

diff --git a/py-polars/tests/unit/operations/test_bitwise.py b/py-polars/tests/unit/operations/test_bitwise.py
@@ -7,6 +7,7 @@
 
 import polars as pl
 from polars.testing import assert_frame_equal, assert_series_equal
+from tests.unit.conftest import INTEGER_DTYPES
 
 
 @pytest.mark.parametrize("op", ["and_", "or_"])
@@ -80,20 +81,7 @@ def trailing_ones(v: int | None) -> int | None:
         None,
     ],
 )
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        pl.Int8,
-        pl.Int16,
-        pl.Int32,
-        pl.Int64,
-        pl.UInt8,
-        pl.UInt16,
-        pl.UInt32,
-        pl.UInt64,
-        pl.Boolean,
-    ],
-)
+@pytest.mark.parametrize("dtype", [*INTEGER_DTYPES, pl.Boolean])
 @pytest.mark.skipif(sys.version_info < (3, 10), reason="bit_count introduced in 3.10")
 @typing.no_type_check
 def test_bit_counts(value: int, dtype: pl.DataType) -> None:
@@ -106,6 +94,8 @@ def test_bit_counts(value: int, dtype: pl.DataType) -> None:
         bitsize = 32
     elif "64" in str(dtype):
         bitsize = 64
+    elif "128" in str(dtype):
+        bitsize = 128
 
     if bitsize == 1 and value is not None:
         value = value & 1 != 0
@@ -150,10 +140,7 @@ def test_bit_counts(value: int, dtype: pl.DataType) -> None:
     )
 
 
-@pytest.mark.parametrize(
-    "dtype",
-    [pl.Int8, pl.Int16, pl.Int32, pl.Int64, pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64],
-)
+@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
 def test_bit_aggregations(dtype: pl.DataType) -> None:
     s = pl.Series("a", [0x74, 0x1C, 0x05], dtype)
 
@@ -175,10 +162,7 @@ def test_bit_aggregations(dtype: pl.DataType) -> None:
     )
 
 
-@pytest.mark.parametrize(
-    "dtype",
-    [pl.Int8, pl.Int16, pl.Int32, pl.Int64, pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64],
-)
+@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
 def test_bit_group_by(dtype: pl.DataType) -> None:
     df = pl.DataFrame(
         [

diff --git a/py-polars/tests/unit/operations/test_cast.py b/py-polars/tests/unit/operations/test_cast.py
@@ -11,6 +11,7 @@
 from polars.exceptions import ComputeError, InvalidOperationError
 from polars.testing import assert_frame_equal
 from polars.testing.asserts.series import assert_series_equal
+from tests.unit.conftest import INTEGER_DTYPES
 
 if TYPE_CHECKING:
     from polars._typing import PolarsDataType, PythonDataType
@@ -560,21 +561,14 @@ def test_strict_cast_string(
 @pytest.mark.parametrize(
     "dtype_out",
     [
-        (pl.UInt8),
-        (pl.Int8),
-        (pl.UInt16),
-        (pl.Int16),
-        (pl.UInt32),
-        (pl.Int32),
-        (pl.UInt64),
-        (pl.Int64),
-        (pl.Date),
-        (pl.Datetime),
-        (pl.Time),
-        (pl.Duration),
-        (pl.String),
-        (pl.Categorical),
-        (pl.Enum(["1", "2"])),
+        *INTEGER_DTYPES,
+        pl.Date,
+        pl.Datetime,
+        pl.Time,
+        pl.Duration,
+        pl.String,
+        pl.Categorical,
+        pl.Enum(["1", "2"]),
     ],
 )
 def test_cast_categorical_name_retention(
@@ -669,10 +663,7 @@ def test_all_null_cast_5826() -> None:
     assert out.item() is None
 
 
-@pytest.mark.parametrize(
-    "dtype",
-    [pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64, pl.Int8, pl.Int16, pl.Int32, pl.Int64],
-)
+@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
 def test_bool_numeric_supertype(dtype: PolarsDataType) -> None:
     df = pl.DataFrame({"v": [1, 2, 3, 4, 5, 6]})
     result = df.select((pl.col("v") < 3).sum().cast(dtype) / pl.len())

diff --git a/py-polars/tests/unit/operations/test_interpolate.py b/py-polars/tests/unit/operations/test_interpolate.py
@@ -7,6 +7,7 @@
 
 import polars as pl
 from polars.testing import assert_frame_equal
+from tests.unit.conftest import NUMERIC_DTYPES
 
 if TYPE_CHECKING:
     from polars._typing import PolarsDataType, PolarsTemporalType
@@ -85,21 +86,7 @@ def test_interpolate_temporal_linear(
     assert_frame_equal(result.collect(), expected)
 
 
-@pytest.mark.parametrize(
-    "input_dtype",
-    [
-        pl.Int8,
-        pl.Int16,
-        pl.Int32,
-        pl.Int64,
-        pl.UInt8,
-        pl.UInt16,
-        pl.UInt32,
-        pl.UInt64,
-        pl.Float32,
-        pl.Float64,
-    ],
-)
+@pytest.mark.parametrize("input_dtype", NUMERIC_DTYPES)
 def test_interpolate_nearest(input_dtype: PolarsDataType) -> None:
     df = pl.LazyFrame({"a": [1, None, 2, None, 3]}, schema={"a": input_dtype})
     result = df.with_columns(pl.all().interpolate(method="nearest"))

diff --git a/py-polars/tests/unit/test_datatypes.py b/py-polars/tests/unit/test_datatypes.py
@@ -215,6 +215,11 @@ def test_raise_invalid_namespace() -> None:
         (pl.UInt32, 0, 4294967295),
         (pl.Int64, -9223372036854775808, 9223372036854775807),
         (pl.UInt64, 0, 18446744073709551615),
+        (
+            pl.Int128,
+            -170141183460469231731687303715884105728,
+            170141183460469231731687303715884105727,
+        ),
         (pl.Float32, float("-inf"), float("inf")),
         (pl.Float64, float("-inf"), float("inf")),
     ],

diff --git a/py-polars/tests/unit/test_queries.py b/py-polars/tests/unit/test_queries.py
@@ -8,6 +8,7 @@
 
 import polars as pl
 from polars.testing import assert_frame_equal
+from tests.unit.conftest import NUMERIC_DTYPES
 
 
 def test_sort_by_bools() -> None:
@@ -172,18 +173,7 @@ def test_group_by_agg_equals_zero_3535() -> None:
 
 
 def test_dtype_concat_3735() -> None:
-    for dt in [
-        pl.Int8,
-        pl.Int16,
-        pl.Int32,
-        pl.Int64,
-        pl.UInt8,
-        pl.UInt16,
-        pl.UInt32,
-        pl.UInt64,
-        pl.Float32,
-        pl.Float64,
-    ]:
+    for dt in NUMERIC_DTYPES:
         d1 = pl.DataFrame([pl.Series("val", [1, 2], dtype=dt)])
 
     d2 = pl.DataFrame([pl.Series("val", [3, 4], dtype=dt)])

diff --git a/py-polars/tests/unit/test_row_encoding.py b/py-polars/tests/unit/test_row_encoding.py
@@ -10,6 +10,7 @@
 from polars.testing import assert_frame_equal, assert_series_equal
 from polars.testing.parametric import dataframes, series
 from polars.testing.parametric.strategies.dtype import dtypes
+from tests.unit.conftest import FLOAT_DTYPES, INTEGER_DTYPES
 
 if TYPE_CHECKING:
     from polars._typing import PolarsDataType
@@ -78,19 +79,7 @@ def test_bool(field: tuple[bool, bool, bool]) -> None:
     roundtrip_series_re([True, False], pl.Boolean, field)
 
 
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        pl.Int8,
-        pl.Int16,
-        pl.Int32,
-        pl.Int64,
-        pl.UInt8,
-        pl.UInt16,
-        pl.UInt32,
-        pl.UInt64,
-    ],
-)
+@pytest.mark.parametrize("dtype", INTEGER_DTYPES)
 @pytest.mark.parametrize("field", FIELD_COMBS)
 def test_int(dtype: pl.DataType, field: tuple[bool, bool, bool]) -> None:
     min = pl.select(x=dtype.min()).item()  # type: ignore[attr-defined]
@@ -106,13 +95,7 @@ def test_int(dtype: pl.DataType, field: tuple[bool, bool, bool]) -> None:
     roundtrip_series_re([min, 0, max], dtype, field)
 
 
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        pl.Float32,
-        pl.Float64,
-    ],
-)
+@pytest.mark.parametrize("dtype", FLOAT_DTYPES)
 @pytest.mark.parametrize("field", FIELD_COMBS)
 def test_float(dtype: pl.DataType, field: tuple[bool, bool, bool]) -> None:
     inf = float("inf")