Skip to content

Commit

Permalink
Merge branch 'main' into 110-method-in-table-to-apply-transformer
Browse files Browse the repository at this point in the history
# Conflicts:
#	src/safeds/data/tabular/containers/_table.py
  • Loading branch information
lars-reimann committed Apr 21, 2023
2 parents 29f2af2 + 846bf23 commit d4b8156
Show file tree
Hide file tree
Showing 7 changed files with 223 additions and 15 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -45,5 +45,5 @@ report/
megalinter-reports/

# Other
.DS_Store/
.DS_Store
*.log
41 changes: 41 additions & 0 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from collections.abc import Callable, Iterable

from safeds.data.tabular.transformation import TableTransformer
from safeds.data.tabular.transformation import InvertibleTableTransformer

from ._tagged_table import TaggedTable

Expand Down Expand Up @@ -1027,6 +1028,46 @@ def transform_table(self, transformer: TableTransformer) -> Table:
"""
return transformer.transform(self)

def inverse_transform_table(self, transformer: InvertibleTableTransformer) -> Table:
"""
Invert the transformation applied by the given transformer.
Parameters
----------
transformer : InvertibleTableTransformer
A transformer that was fitted with columns, which are all present in the table.
Returns
-------
table : Table
The original table
Raises
------
TransformerNotFittedError
If the transformer has not been fitted yet.
Examples
--------
>>> from safeds.data.tabular.transformation import OneHotEncoder
>>> from safeds.data.tabular.containers import Table
>>> transformer = OneHotEncoder()
>>> table = Table.from_dict({"col1": [1, 2, 1], "col2": [1, 2, 4]})
>>> transformer = transformer.fit(table, None)
>>> transformed_table = transformer.transform(table)
>>> transformed_table.inverse_transform_table(transformer)
col1 col2
0 1 1
1 2 2
2 1 4
>>> transformer.inverse_transform(transformed_table)
col1 col2
0 1 1
1 2 2
2 1 4
"""
return transformer.inverse_transform(self)

# ------------------------------------------------------------------------------------------------------------------
# Plotting
# ------------------------------------------------------------------------------------------------------------------
Expand Down
10 changes: 0 additions & 10 deletions src/safeds/data/tabular/transformation/_label_encoder.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
from __future__ import annotations

import warnings
from typing import Any

from sklearn.preprocessing import OrdinalEncoder as sk_OrdinalEncoder

from safeds.data.tabular.containers import Table
Expand All @@ -12,13 +9,6 @@
)


def warn(*_: Any, **__: Any) -> None:
pass


warnings.warn = warn


# noinspection PyProtectedMember
class LabelEncoder(InvertibleTableTransformer):
"""The LabelEncoder encodes one or more given columns into labels."""
Expand Down
5 changes: 4 additions & 1 deletion src/safeds/ml/classical/_util_sklearn.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
from typing import Any

from safeds.data.tabular.containers import Table, TaggedTable
Expand Down Expand Up @@ -84,7 +85,9 @@ def predict(model: Any, dataset: Table, feature_names: list[str] | None, target_
result_set.columns = dataset.column_names

try:
predicted_target_vector = model.predict(dataset_df.values)
with warnings.catch_warnings():
warnings.filterwarnings("ignore", message="X does not have valid feature names")
predicted_target_vector = model.predict(dataset_df.values)
result_set[target_name] = predicted_target_vector
return Table(result_set).tag_columns(target_name=target_name, feature_names=feature_names)
except ValueError as exception:
Expand Down
22 changes: 19 additions & 3 deletions src/safeds/ml/classical/regression/_elastic_net_regression.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import warnings
from typing import TYPE_CHECKING

from sklearn.linear_model import ElasticNet as sk_ElasticNet
Expand All @@ -15,7 +16,22 @@
class ElasticNetRegression(Regressor):
"""Elastic net regression."""

def __init__(self) -> None:
def __init__(self, lasso_ratio: float = 0.5) -> None:
if lasso_ratio < 0 or lasso_ratio > 1:
raise ValueError("lasso_ratio must be between 0 and 1.")
elif lasso_ratio == 0:
warnings.warn(
"ElasticNetRegression with lasso_ratio = 0 is essentially RidgeRegression."
" Use RidgeRegression instead for better numerical stability.",
stacklevel=1,
)
elif lasso_ratio == 1:
warnings.warn(
"ElasticNetRegression with lasso_ratio = 0 is essentially LassoRegression."
" Use LassoRegression instead for better numerical stability.",
stacklevel=1,
)
self.lasso_ratio = lasso_ratio
self._wrapped_regressor: sk_ElasticNet | None = None
self._feature_names: list[str] | None = None
self._target_name: str | None = None
Expand All @@ -41,10 +57,10 @@ def fit(self, training_set: TaggedTable) -> ElasticNetRegression:
LearningError
If the training data contains invalid values or if the training failed.
"""
wrapped_regressor = sk_ElasticNet()
wrapped_regressor = sk_ElasticNet(l1_ratio=self.lasso_ratio)
fit(wrapped_regressor, training_set)

result = ElasticNetRegression()
result = ElasticNetRegression(self.lasso_ratio)
result._wrapped_regressor = wrapped_regressor
result._feature_names = training_set.features.column_names
result._target_name = training_set.target.name
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import pytest
from safeds.data.tabular.containers import Table
from safeds.data.tabular.exceptions import TransformerNotFittedError
from safeds.data.tabular.transformation import OneHotEncoder


class TestInverseTransformTableOnOneHotEncoder:
@pytest.mark.parametrize(
("table_to_fit", "column_names", "table_to_transform"),
[
(
Table.from_dict(
{
"a": [1.0, 0.0, 0.0, 0.0],
"b": ["a", "b", "b", "c"],
"c": [0.0, 0.0, 0.0, 1.0],
},
),
["b"],
Table.from_dict(
{
"a": [1.0, 0.0, 0.0, 0.0],
"b": ["a", "b", "b", "c"],
"c": [0.0, 0.0, 0.0, 1.0],
},
),
),
(
Table.from_dict(
{
"a": [1.0, 0.0, 0.0, 0.0],
"b": ["a", "b", "b", "c"],
"c": [0.0, 0.0, 0.0, 1.0],
},
),
["b"],
Table.from_dict(
{
"c": [0.0, 0.0, 0.0, 1.0],
"b": ["a", "b", "b", "c"],
"a": [1.0, 0.0, 0.0, 0.0],
},
),
),
(
Table.from_dict(
{
"a": [1.0, 0.0, 0.0, 0.0],
"b": ["a", "b", "b", "c"],
"bb": ["a", "b", "b", "c"],
},
),
["b", "bb"],
Table.from_dict(
{
"a": [1.0, 0.0, 0.0, 0.0],
"b": ["a", "b", "b", "c"],
"bb": ["a", "b", "b", "c"],
},
),
),
],
ids=[
"same table to fit and transform",
"different tables to fit and transform",
"one column name is a prefix of another column name",
],
)
def test_should_return_original_table(
self,
table_to_fit: Table,
column_names: list[str],
table_to_transform: Table,
) -> None:
transformer = OneHotEncoder().fit(table_to_fit, column_names)
transformed_table = transformer.transform(table_to_transform)

result = transformed_table.inverse_transform_table(transformer)

# This checks whether the columns are in the same order
assert result.column_names == table_to_transform.column_names
# This is subsumed by the next assertion, but we get a better error message
assert result.schema == table_to_transform.schema
assert result == table_to_transform

def test_should_not_change_transformed_table(self) -> None:
table = Table.from_dict(
{
"col1": ["a", "b", "b", "c"],
},
)

transformer = OneHotEncoder().fit(table, None)
transformed_table = transformer.transform(table)
transformed_table.inverse_transform_table(transformer)

expected = Table.from_dict(
{
"col1_a": [1.0, 0.0, 0.0, 0.0],
"col1_b": [0.0, 1.0, 1.0, 0.0],
"col1_c": [0.0, 0.0, 0.0, 1.0],
},
)

assert transformed_table == expected

def test_should_raise_if_not_fitted(self) -> None:
table = Table.from_dict(
{
"a": [1.0, 0.0, 0.0, 0.0],
"b": [0.0, 1.0, 1.0, 0.0],
"c": [0.0, 0.0, 0.0, 1.0],
},
)

transformer = OneHotEncoder()

with pytest.raises(TransformerNotFittedError):
table.inverse_transform_table(transformer)
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import pytest
from safeds.data.tabular.containers import Table
from safeds.ml.classical.regression._elastic_net_regression import ElasticNetRegression


def test_lasso_ratio_valid() -> None:
training_set = Table.from_dict({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]})
tagged_training_set = training_set.tag_columns(target_name="col1", feature_names=["col2"])
lasso_ratio = 0.3

elastic_net_regression = ElasticNetRegression(lasso_ratio).fit(tagged_training_set)
assert elastic_net_regression._wrapped_regressor is not None
assert elastic_net_regression._wrapped_regressor.l1_ratio == lasso_ratio


def test_lasso_ratio_invalid() -> None:
with pytest.raises(ValueError, match="lasso_ratio must be between 0 and 1."):
ElasticNetRegression(-1)


def test_lasso_ratio_zero() -> None:
with pytest.warns(
UserWarning,
match="ElasticNetRegression with lasso_ratio = 0 is essentially RidgeRegression."
" Use RidgeRegression instead for better numerical stability.",
):
ElasticNetRegression(0)


def test_lasso_ratio_one() -> None:
with pytest.warns(
UserWarning,
match="ElasticNetRegression with lasso_ratio = 0 is essentially LassoRegression."
" Use LassoRegression instead for better numerical stability.",
):
ElasticNetRegression(1)


# (Default parameter is tested in `test_regressor.py`.)

0 comments on commit d4b8156

Please sign in to comment.