Skip to content

Commit

Permalink
handle new apply warnings in existing tests
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie committed Jul 22, 2023
1 parent e8410f2 commit 63ddaf7
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 29 deletions.
26 changes: 18 additions & 8 deletions py-polars/polars/utils/udfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,13 +371,14 @@ class RewrittenInstructions:
from the identification of expression translation opportunities.
"""

_ignored_ops = frozenset(["COPY_FREE_VARS", "PRECALL", "RESUME", "RETURN_VALUE"])

def __init__(self, instructions: Iterator[Instruction]):
self._instructions = instructions
self._rewritten_instructions = self._apply_rules(
self._upgrade_instruction(inst)
for inst in instructions
if inst.opname
not in ("COPY_FREE_VARS", "PRECALL", "RESUME", "RETURN_VALUE")
if inst.opname not in self._ignored_ops
)

def __len__(self) -> int:
Expand All @@ -404,14 +405,16 @@ def _apply_rules(self, instructions: Iterator[Instruction]) -> list[Instruction]
apply_rewrite(inst, updated_instructions)
for apply_rewrite in (
# add any other rewrite methods here
self._functions,
self._methods,
self._rewrite_functions,
self._rewrite_methods,
)
):
updated_instructions.append(inst)
return updated_instructions

def _functions(self, inst: Instruction, instructions: list[Instruction]) -> bool:
def _rewrite_functions(
self, inst: Instruction, instructions: list[Instruction]
) -> bool:
"""Replace numpy/json function calls with a synthetic POLARS_EXPRESSION op."""
if inst.opname == "LOAD_GLOBAL" and (
inst.argval in _NUMPY_MODULE_ALIASES or inst.argval == "json"
Expand Down Expand Up @@ -445,7 +448,9 @@ def _functions(self, inst: Instruction, instructions: list[Instruction]) -> bool
return True
return False

def _methods(self, inst: Instruction, instructions: list[Instruction]) -> bool:
def _rewrite_methods(
self, inst: Instruction, instructions: list[Instruction]
) -> bool:
"""Replace python method calls with synthetic POLARS_EXPRESSION op."""
if inst.opname == "LOAD_METHOD" and inst.argval in _PYFUNCTION_MAP:
if (
Expand Down Expand Up @@ -482,10 +487,15 @@ def _is_raw_function(function: Callable[[Any], Any]) -> tuple[str, str]:
func_name = function.__name__
if func_module == "numpy" and func_name in _NUMPY_FUNCTIONS:
return "np", func_name
elif func_module in ("builtins", "json") and func_name == "loads":
return "json", "str.json_extract"
elif func_module == "builtins" and func_name == "loads":
import json # double-check since it is referenced via 'builtins'

if function is json.loads:
return "json", "str.json_extract"

except AttributeError:
pass

return "", ""


Expand Down
57 changes: 36 additions & 21 deletions py-polars/tests/unit/operations/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,18 +172,28 @@ def test_datelike_identity() -> None:
def test_apply_list_anyvalue_fallback() -> None:
import json

df = pl.DataFrame({"text": ['[{"x": 1, "y": 2}, {"x": 3, "y": 4}]']})
assert df.select(pl.col("text").apply(json.loads)).to_dict(False) == {
"text": [[{"x": 1, "y": 2}, {"x": 3, "y": 4}]]
}
with pytest.warns(
PolarsInefficientApplyWarning,
match=r'(?s)replace your `apply` with.*pl.col\("text"\).str.json_extract()',
):
df = pl.DataFrame({"text": ['[{"x": 1, "y": 2}, {"x": 3, "y": 4}]']})
assert df.select(pl.col("text").apply(json.loads)).to_dict(False) == {
"text": [[{"x": 1, "y": 2}, {"x": 3, "y": 4}]]
}

# starts with empty list '[]'
df = pl.DataFrame(
{"text": ["[]", '[{"x": 1, "y": 2}, {"x": 3, "y": 4}]', '[{"x": 1, "y": 2}]']}
)
assert df.select(pl.col("text").apply(json.loads)).to_dict(False) == {
"text": [[], [{"x": 1, "y": 2}, {"x": 3, "y": 4}], [{"x": 1, "y": 2}]]
}
# starts with empty list '[]'
df = pl.DataFrame(
{
"text": [
"[]",
'[{"x": 1, "y": 2}, {"x": 3, "y": 4}]',
'[{"x": 1, "y": 2}]',
]
}
)
assert df.select(pl.col("text").apply(json.loads)).to_dict(False) == {
"text": [[], [{"x": 1, "y": 2}, {"x": 3, "y": 4}], [{"x": 1, "y": 2}]]
}


def test_apply_all_types() -> None:
Expand Down Expand Up @@ -246,7 +256,8 @@ def test_apply_skip_nulls() -> None:

def test_apply_object_dtypes() -> None:
with pytest.warns(
PolarsInefficientApplyWarning, match="In this case, you can replace"
PolarsInefficientApplyWarning,
match=r"(?s)replace your `apply` with.*lambda x:",
):
assert pl.DataFrame(
{"a": pl.Series([1, 2, "a", 4, 5], dtype=pl.Object)}
Expand Down Expand Up @@ -283,15 +294,19 @@ def test_apply_explicit_list_output_type() -> None:


def test_apply_dict() -> None:
df = pl.DataFrame({"Col": ['{"A":"Value1"}', '{"B":"Value2"}']})
assert df.select(pl.col("Col").apply(json.loads)).to_dict(False) == {
"Col": [{"A": "Value1", "B": None}, {"A": None, "B": "Value2"}]
}
assert pl.DataFrame(
{"Col": ['{"A":"Value1", "B":"Value2"}', '{"B":"Value3"}']}
).select(pl.col("Col").apply(json.loads)).to_dict(False) == {
"Col": [{"A": "Value1", "B": "Value2"}, {"A": None, "B": "Value3"}]
}
with pytest.warns(
PolarsInefficientApplyWarning,
match=r'(?s)replace your `apply` with.*pl.col\("abc"\).str.json_extract()',
):
df = pl.DataFrame({"abc": ['{"A":"Value1"}', '{"B":"Value2"}']})
assert df.select(pl.col("abc").apply(json.loads)).to_dict(False) == {
"abc": [{"A": "Value1", "B": None}, {"A": None, "B": "Value2"}]
}
assert pl.DataFrame(
{"abc": ['{"A":"Value1", "B":"Value2"}', '{"B":"Value3"}']}
).select(pl.col("abc").apply(json.loads)).to_dict(False) == {
"abc": [{"A": "Value1", "B": "Value2"}, {"A": None, "B": "Value3"}]
}


def test_apply_pass_name() -> None:
Expand Down

0 comments on commit 63ddaf7

Please sign in to comment.