From f26d422e1688a94e3953eb4eec52f785e58f609f Mon Sep 17 00:00:00 2001 From: ritchie Date: Sat, 6 Jul 2024 14:55:09 +0200 Subject: [PATCH 1/3] fix: Don't rechunk on phys_repr --- crates/polars-core/src/series/mod.rs | 8 ++++++-- py-polars/tests/unit/operations/test_gather.py | 11 +++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/crates/polars-core/src/series/mod.rs b/crates/polars-core/src/series/mod.rs index 567f7e66f9d7..98c7ca161b12 100644 --- a/crates/polars-core/src/series/mod.rs +++ b/crates/polars-core/src/series/mod.rs @@ -592,8 +592,12 @@ impl Series { pub fn to_physical_repr(&self) -> Cow { use DataType::*; match self.dtype() { - Date => Cow::Owned(self.cast(&Int32).unwrap()), - Datetime(_, _) | Duration(_) | Time => Cow::Owned(self.cast(&Int64).unwrap()), + // NOTE: Don't use cast here, as it might rechunk (if all nulls) + // which is not allowed in a phys repr. + Date => Cow::Owned(self.date().unwrap().0.clone().into_series()), + Datetime(_, _) => Cow::Owned(self.datetime().unwrap().0.clone().into_series()), + Duration(_) => Cow::Owned(self.duration().unwrap().0.clone().into_series()), + Time => Cow::Owned(self.time().unwrap().0.clone().into_series()), #[cfg(feature = "dtype-categorical")] Categorical(_, _) | Enum(_, _) => { let ca = self.categorical().unwrap(); diff --git a/py-polars/tests/unit/operations/test_gather.py b/py-polars/tests/unit/operations/test_gather.py index e897a8da57a6..fab07dc71956 100644 --- a/py-polars/tests/unit/operations/test_gather.py +++ b/py-polars/tests/unit/operations/test_gather.py @@ -137,3 +137,14 @@ def test_list_get_null_on_oob_true() -> None: df = s_no_nulls.to_frame().with_columns(pl.lit(2).alias("idx")) out = df.select(pl.col("a").list.get("idx", null_on_oob=True)).to_series() assert_series_equal(out, expected) + + +def test_chunked_gather_phys_repr_17446() -> None: + dfa = pl.DataFrame({"replace_unique_id": range(2)}) + + for dt in [pl.Date, pl.Time, pl.Duration]: + dfb = dfa.clone() + dfb = dfb.with_columns(ds_start_date_right=pl.lit(None).cast(dt)) + dfb = pl.concat([dfb, dfb]) + + assert dfa.join(dfb, how="left", on=pl.col("replace_unique_id")).shape == (4, 2) From b2cd1dd4d70fee18a8a957ce62272c62293431fc Mon Sep 17 00:00:00 2001 From: ritchie Date: Sat, 6 Jul 2024 14:59:54 +0200 Subject: [PATCH 2/3] fix features --- crates/polars-core/src/series/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crates/polars-core/src/series/mod.rs b/crates/polars-core/src/series/mod.rs index 98c7ca161b12..b4fe9f1b4900 100644 --- a/crates/polars-core/src/series/mod.rs +++ b/crates/polars-core/src/series/mod.rs @@ -594,9 +594,12 @@ impl Series { match self.dtype() { // NOTE: Don't use cast here, as it might rechunk (if all nulls) // which is not allowed in a phys repr. + #[cfg(feature = "dtype-date")] Date => Cow::Owned(self.date().unwrap().0.clone().into_series()), + #[cfg(feature = "dtype-duration")] Datetime(_, _) => Cow::Owned(self.datetime().unwrap().0.clone().into_series()), Duration(_) => Cow::Owned(self.duration().unwrap().0.clone().into_series()), + #[cfg(feature = "dtype-time")] Time => Cow::Owned(self.time().unwrap().0.clone().into_series()), #[cfg(feature = "dtype-categorical")] Categorical(_, _) | Enum(_, _) => { From 1d552cd9a18e82ee537d56b9ae74499e0199de9f Mon Sep 17 00:00:00 2001 From: ritchie Date: Sat, 6 Jul 2024 15:03:23 +0200 Subject: [PATCH 3/3] fix --- crates/polars-core/src/series/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/polars-core/src/series/mod.rs b/crates/polars-core/src/series/mod.rs index b4fe9f1b4900..4c63bedd9ffd 100644 --- a/crates/polars-core/src/series/mod.rs +++ b/crates/polars-core/src/series/mod.rs @@ -596,8 +596,9 @@ impl Series { // which is not allowed in a phys repr. #[cfg(feature = "dtype-date")] Date => Cow::Owned(self.date().unwrap().0.clone().into_series()), - #[cfg(feature = "dtype-duration")] + #[cfg(feature = "dtype-datetime")] Datetime(_, _) => Cow::Owned(self.datetime().unwrap().0.clone().into_series()), + #[cfg(feature = "dtype-duration")] Duration(_) => Cow::Owned(self.duration().unwrap().0.clone().into_series()), #[cfg(feature = "dtype-time")] Time => Cow::Owned(self.time().unwrap().0.clone().into_series()),