Skip to content

Commit

Permalink
feat: Table.count_row_if
Browse files Browse the repository at this point in the history
  • Loading branch information
lars-reimann committed May 17, 2024
1 parent b94a55e commit 6047f91
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 1 deletion.
69 changes: 68 additions & 1 deletion src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Any, Literal
from typing import TYPE_CHECKING, Any, Literal, overload

from safeds._config import _get_device, _init_default_device
from safeds._config._polars import _get_polars_config
Expand Down Expand Up @@ -1008,6 +1008,73 @@ def transform_column(
# Row operations
# ------------------------------------------------------------------------------------------------------------------

@overload
def count_row_if(
self,
predicate: Callable[Row, Cell[bool | None]],
*,
ignore_unknown: Literal[True] = ...,
) -> int: ...

@overload
def count_row_if(
self,
predicate: Callable[Row, Cell[bool | None]],
*,
ignore_unknown: bool,
) -> int | None: ...

def count_row_if(
self,
predicate: Callable[Row, Cell[bool | None]],
*,
ignore_unknown: bool = True,
) -> int | None:
"""
Return how many rows in the table satisfy the predicate.
The predicate can return one of three results:
* True, if the row satisfies the predicate.
* False, if the row does not satisfy the predicate.
* None, if the truthiness of the predicate is unknown, e.g. due to missing values.
By default, cases where the truthiness of the predicate is unknown are ignored and this method returns how
often the predicate returns True.
You can instead enable Kleene logic by setting `ignore_unknown=False`. In this case, this method returns None if
the predicate returns None at least once. Otherwise, it still returns how often the predicate returns True.
Parameters
----------
predicate:
The predicate to apply to each row.
ignore_unknown:
Whether to ignore cases where the truthiness of the predicate is unknown.
Returns
-------
count:
The number of rows in the table that satisfy the predicate.
Examples
--------
>>> from safeds.data.tabular.containers import Table
>>> table = Table({"col1": [1, 2, 3], "col2": [1, 3, 3]})
>>> table.count_row_if(lambda row: row["col1"] == row["col2"])
2
>>> table.count_row_if(lambda row: row["col1"] > row["col2"])
0
"""
expression = predicate(_LazyVectorizedRow(self))._polars_expression
series = self._lazy_frame.select(expression.alias("count")).collect().get_column("count")

if ignore_unknown or series.null_count() == 0:
return series.sum()
else:
return None

# TODO: Rethink group_rows/group_rows_by_column. They should not return a dict.

def remove_duplicate_rows(self) -> Table:
Expand Down
64 changes: 64 additions & 0 deletions tests/safeds/data/tabular/containers/_table/test_count_row_if.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import pytest
from safeds.data.tabular.containers import Table


@pytest.mark.parametrize(
("values", "expected"),
[
([], 0),
([1], 1),
([2], 0),
([None], 0),
([1, None], 1),
([2, None], 0),
([1, 2], 1),
([1, 2, None], 1),
],
ids=[
"empty",
"always true",
"always false",
"always unknown",
"true and unknown",
"false and unknown",
"true and false",
"true and false and unknown",
],
)
def test_should_handle_boolean_logic(
values: list,
expected: int,
) -> None:
table = Table({"a": values})
assert table.count_row_if(lambda row: row["a"] < 2) == expected


@pytest.mark.parametrize(
("values", "expected"),
[
([], 0),
([1], 1),
([2], 0),
([None], None),
([1, None], None),
([2, None], None),
([1, 2], 1),
([1, 2, None], None),
],
ids=[
"empty",
"always true",
"always false",
"always unknown",
"true and unknown",
"false and unknown",
"true and false",
"true and false and unknown",
],
)
def test_should_handle_kleene_logic(
values: list,
expected: int | None,
) -> None:
table = Table({"a": values})
assert table.count_row_if(lambda row: row["a"] < 2, ignore_unknown=False) == expected

0 comments on commit 6047f91

Please sign in to comment.