forked from narwhals-dev/narwhals
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* implemented without inclusive option (https://docs.pola.rs/api/python/dev/reference/expressions/api/polars.Expr.str.split.html) * support for _arrow, _dask, _duckdb, spark_like * support for pandas_like(pyarrow backed)
- Loading branch information
Stelios Kritsotalakis
authored and
Stelios Kritsotalakis
committed
Feb 21, 2025
1 parent
44324b9
commit 6dfecfa
Showing
13 changed files
with
259 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,7 @@ | |
- replace | ||
- replace_all | ||
- slice | ||
- split | ||
- starts_with | ||
- strip_chars | ||
- tail | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,7 @@ | |
- replace | ||
- replace_all | ||
- slice | ||
- split | ||
- starts_with | ||
- strip_chars | ||
- tail | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
from __future__ import annotations | ||
|
||
import re | ||
from typing import Any | ||
|
||
import pytest | ||
|
||
import narwhals.stable.v1 as nw | ||
from tests.utils import Constructor | ||
from tests.utils import ConstructorEager | ||
from tests.utils import assert_equal_data | ||
|
||
data = {"s": ["foo bar", "foo_bar", "foo_bar_baz", "foo,bar"]} | ||
|
||
|
||
@pytest.mark.parametrize( | ||
("by", "expected"), | ||
[ | ||
("_", {"s": [["foo bar"], ["foo", "bar"], ["foo", "bar", "baz"], ["foo,bar"]]}), | ||
( | ||
",", | ||
{"s": [["foo bar"], ["foo_bar"], ["foo_bar_baz"], ["foo", "bar"]]}, | ||
), | ||
], | ||
) | ||
def test_str_split( | ||
constructor: Constructor, | ||
by: str, | ||
expected: Any, | ||
) -> None: | ||
if ( | ||
constructor.__name__.startswith("pandas") | ||
and "pyarrow" not in constructor.__name__ | ||
): | ||
df = nw.from_native(constructor(data)) | ||
msg = re.escape("This operation requires a pyarrow-backed series. ") | ||
with pytest.raises(TypeError, match=msg): | ||
df.select(nw.col("s").str.split(by=by)) | ||
return | ||
df = nw.from_native(constructor(data)) | ||
result_frame = df.select(nw.col("s").str.split(by=by)) | ||
assert_equal_data(result_frame, expected) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
("by", "expected"), | ||
[ | ||
("_", {"s": [["foo bar"], ["foo", "bar"], ["foo", "bar", "baz"], ["foo,bar"]]}), | ||
( | ||
",", | ||
{"s": [["foo bar"], ["foo_bar"], ["foo_bar_baz"], ["foo", "bar"]]}, | ||
), | ||
], | ||
) | ||
def test_str_split_series( | ||
constructor_eager: ConstructorEager, | ||
by: str, | ||
expected: Any, | ||
) -> None: | ||
if ( | ||
constructor_eager.__name__.startswith("pandas") | ||
and "pyarrow" not in constructor_eager.__name__ | ||
): | ||
df = nw.from_native(constructor_eager(data), eager_only=True) | ||
msg = re.escape("This operation requires a pyarrow-backed series. ") | ||
with pytest.raises(TypeError, match=msg): | ||
df["s"].str.split(by=by) | ||
return | ||
df = nw.from_native(constructor_eager(data), eager_only=True) | ||
result_series = df["s"].str.split(by=by) | ||
assert_equal_data({"s": result_series}, expected) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters