Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: discretize table #327

Merged
merged 41 commits into from
Jul 7, 2023
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
32f8c91
add Discretizer file
robmeth May 26, 2023
b2705e5
add tests for Discretizer
robmeth May 26, 2023
0d06d95
Merge branch 'main' into 143-discretize-table
robmeth May 26, 2023
d88cdec
added Discretizer to init file
robmeth May 26, 2023
7a21a52
style: apply automated linter fixes
megalinter-bot May 26, 2023
432fde4
removed comment
robmeth May 26, 2023
603fda0
Merge branch 'main' into 143-discretize-table
robmeth May 26, 2023
2e72846
Merge branch '143-discretize-table' of https://github.com/Safe-DS/Std…
robmeth May 26, 2023
8ff87bb
trying to fix the test but it doesn't work
robmeth Jun 9, 2023
2e8cf27
requested changes
robmeth Jun 23, 2023
146cb54
Merge branch 'main' into 143-discretize-table
robmeth Jun 23, 2023
a3e46bb
requested changes
robmeth Jun 23, 2023
b33a95f
import changes
robmeth Jun 23, 2023
f053755
Merge branch 'main' into 143-discretize-table
robmeth Jun 30, 2023
b3d6baf
add ValueError and NonNumericColumnError and test them
robmeth Jun 30, 2023
0435b55
fixed import
robmeth Jun 30, 2023
cf55a5e
style: apply automated linter fixes
megalinter-bot Jun 30, 2023
4b58a0d
style: apply automated linter fixes
megalinter-bot Jun 30, 2023
4a7520a
fixed import
robmeth Jun 30, 2023
452b1d8
fixed import
robmeth Jun 30, 2023
99f1391
style: apply automated linter fixes
megalinter-bot Jun 30, 2023
93fe3cc
fixed import
robmeth Jun 30, 2023
820ae0a
merge conflict
robmeth Jun 30, 2023
30b5482
change order of errors
robmeth Jun 30, 2023
7214b68
style: apply automated linter fixes
megalinter-bot Jun 30, 2023
cf38032
requested changes
robmeth Jun 30, 2023
2a5b687
requested changes
robmeth Jun 30, 2023
bcd834f
Merge branch '143-discretize-table' of https://github.com/Safe-DS/Std…
robmeth Jun 30, 2023
135aa9e
make linter happy
robmeth Jun 30, 2023
4e48200
make linter happy
robmeth Jun 30, 2023
c113b59
make linter happy
robmeth Jun 30, 2023
1f332aa
make linter happy
robmeth Jun 30, 2023
7f0775e
make linter happy
robmeth Jun 30, 2023
9b5edf0
style: apply automated linter fixes
megalinter-bot Jun 30, 2023
11a9803
Merge branch 'main' into 143-discretize-table
robmeth Jun 30, 2023
ed00f74
style: apply automated linter fixes
megalinter-bot Jun 30, 2023
7b5df1b
Merge branch 'main' into 143-discretize-table
robmeth Jun 30, 2023
be05305
requested changes, match error message
robmeth Jul 7, 2023
bc92b9a
Merge branch '143-discretize-table' of https://github.com/Safe-DS/Std…
robmeth Jul 7, 2023
b2c1b29
requested changes
robmeth Jul 7, 2023
85827e4
Merge branch 'main' into 143-discretize-table
robmeth Jul 7, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion src/safeds/data/tabular/transformation/_discretizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from safeds.data.tabular.containers import Table
from safeds.data.tabular.transformation._table_transformer import TableTransformer
robmeth marked this conversation as resolved.
Show resolved Hide resolved
from safeds.exceptions import TransformerNotFittedError, UnknownColumnNameError
from safeds.exceptions import NonNumericColumnError, TransformerNotFittedError, UnknownColumnNameError


class Discretizer(TableTransformer):
sibre28 marked this conversation as resolved.
Show resolved Hide resolved
Expand Down Expand Up @@ -47,14 +47,30 @@ def fit(self, table: Table, column_names: list[str] | None) -> Discretizer:
-------
fitted_transformer : TableTransformer
The fitted transformer.

Raises
------
ValueError
If the table is empty.
NonNumericColumnError
If one of the columns, that should be fitted is not numeric.
robmeth marked this conversation as resolved.
Show resolved Hide resolved
UnknownColumnNameError
If one of the columns, that should be fitted is not in the table.
"""
if table.number_of_rows == 0:
raise ValueError("The Discretizer cannot be fitted because the table contains 0 rows")

if column_names is None:
column_names = table.column_names
else:
missing_columns = set(column_names) - set(table.column_names)
if len(missing_columns) > 0:
raise UnknownColumnNameError(list(missing_columns))

for column in column_names:
if not table.get_column(column).type.is_numeric():
raise NonNumericColumnError(f"{column} is of type {table.get_column(column).type}.")

wrapped_transformer = sk_KBinsDiscretizer(n_bins=self._number_of_bins, encode="ordinal")
wrapped_transformer.fit(table._data[column_names])

Expand Down Expand Up @@ -84,16 +100,27 @@ def transform(self, table: Table) -> Table:
------
TransformerNotFittedError
If the transformer has not been fitted yet.
ValueError
If the table is empty.
NonNumericColumnError
If one of the columns, that should be fitted is not numeric.
robmeth marked this conversation as resolved.
Show resolved Hide resolved
robmeth marked this conversation as resolved.
Show resolved Hide resolved
"""
# Transformer has not been fitted yet
if self._wrapped_transformer is None or self._column_names is None:
raise TransformerNotFittedError

if table.number_of_rows == 0:
raise ValueError("The table cannot be transformed because it contains 0 rows")

# Input table does not contain all columns used to fit the transformer
missing_columns = set(self._column_names) - set(table.column_names)
if len(missing_columns) > 0:
raise UnknownColumnNameError(list(missing_columns))

for column in self._column_names:
if not table.get_column(column).type.is_numeric():
raise NonNumericColumnError(f"{column} is of type {table.get_column(column).type}.")

data = table._data.copy()
data.columns = table.column_names
data[self._column_names] = self._wrapped_transformer.transform(data[self._column_names])
Expand Down
73 changes: 56 additions & 17 deletions tests/safeds/data/tabular/transformation/test_discretizer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest
from safeds.data.tabular.containers import Table
from safeds.data.tabular.transformation import Discretizer
from safeds.exceptions import TransformerNotFittedError, UnknownColumnNameError
from safeds.exceptions import NonNumericColumnError, TransformerNotFittedError, UnknownColumnNameError


class TestInit:
Expand All @@ -11,14 +11,34 @@ def test_should_raise_value_error(self) -> None:


class TestFit:
def test_should_raise_if_column_not_found(self) -> None:
table = Table(
{
"col1": [0.0, 5.0, 10.0],
},
)

with pytest.raises(UnknownColumnNameError):
@pytest.mark.parametrize(
("table", "error", "error_message"),
[
(
Table(
{
"col1": [0.0, 5.0, 5.0, 10.0],
},
),
UnknownColumnNameError,
r"Could not find column\(s\) 'col2'",
),
robmeth marked this conversation as resolved.
Show resolved Hide resolved
(Table(), ValueError, "The Discretizer cannot be fitted because the table contains 0 rows"),
(
Table(
{
"col1": [0.0, 5.0, 5.0, 10.0],
"col2": ["a", "b", "c", "d"],
},
),
NonNumericColumnError,
"Tried to do a numerical operation on one or multiple non-numerical columns: \ncol2 is of type String.",
),
],
ids=["UnknownColumnNameError", "ValueError", "NonNumericColumnError"],
)
def test_should_raise_errors(self, table: Table, error: type[Exception], error_message: str) -> None:
with pytest.raises(error, match=error_message):
Discretizer().fit(table, ["col2"])

def test_should_not_change_original_transformer(self) -> None:
Expand All @@ -36,7 +56,32 @@ def test_should_not_change_original_transformer(self) -> None:


class TestTransform:
def test_should_raise_if_column_not_found(self) -> None:
@pytest.mark.parametrize(
("table_to_transform", "error", "error_message"),
[
(
Table(
{
"col2": ["a", "b", "c"],
},
),
UnknownColumnNameError,
r"Could not find column\(s\) 'col1'",
),
robmeth marked this conversation as resolved.
Show resolved Hide resolved
(Table(), ValueError, "The table cannot be transformed because it contains 0 rows"),
(
Table(
{
"col1": ["a", "b", "c", "d"],
},
),
NonNumericColumnError,
"Tried to do a numerical operation on one or multiple non-numerical columns: \ncol1 is of type String.",
),
],
ids=["UnknownColumnNameError", "ValueError", "NonNumericColumnError"],
)
def test_should_raise_errors(self, table_to_transform: Table, error: type[Exception], error_message: str) -> None:
table_to_fit = Table(
{
"col1": [0.0, 5.0, 10.0],
Expand All @@ -45,13 +90,7 @@ def test_should_raise_if_column_not_found(self) -> None:

transformer = Discretizer().fit(table_to_fit, None)

table_to_transform = Table(
{
"col2": ["a", "b", "c"],
},
)

with pytest.raises(UnknownColumnNameError):
with pytest.raises(error, match=error_message):
transformer.transform(table_to_transform)

def test_should_raise_if_not_fitted(self) -> None:
Expand Down