diff --git a/crates/polars-plan/src/dsl/string.rs b/crates/polars-plan/src/dsl/string.rs index a539f13e183e..aec6952cb057 100644 --- a/crates/polars-plan/src/dsl/string.rs +++ b/crates/polars-plan/src/dsl/string.rs @@ -573,8 +573,9 @@ impl StringNameSpace { #[cfg(feature = "extract_jsonpath")] pub fn json_decode(self, dtype: Option, infer_schema_len: Option) -> Expr { + // Apply, because dtype should be inferred only once and be consistent over chunks/morsels. self.0 - .map_private(FunctionExpr::StringExpr(StringFunction::JsonDecode { + .apply_private(FunctionExpr::StringExpr(StringFunction::JsonDecode { dtype, infer_schema_len, })) diff --git a/py-polars/tests/unit/datatypes/test_string.py b/py-polars/tests/unit/datatypes/test_string.py index ce63c4b6b79c..4250b2c23e99 100644 --- a/py-polars/tests/unit/datatypes/test_string.py +++ b/py-polars/tests/unit/datatypes/test_string.py @@ -1,3 +1,5 @@ +import json + import polars as pl from polars.testing import assert_series_equal @@ -28,3 +30,17 @@ def test_utf8_alias_lit() -> None: result = pl.select(a=pl.lit(5, dtype=pl.Utf8)).to_series() expected = pl.Series("a", ["5"], dtype=pl.String) assert_series_equal(result, expected) + + +def test_json_decode_multiple_chunks() -> None: + a = json.dumps({"x": None}) + b = json.dumps({"x": True}) + + df_1 = pl.Series([a]).to_frame("s") + df_2 = pl.Series([b]).to_frame("s") + + df = pl.concat([df_1, df_2]) + + assert df.with_columns(pl.col("s").str.json_decode()).to_dict(as_series=False) == { + "s": [{"x": None}, {"x": True}] + }