From 3aad07ddcc0da42e1dab2eed49fc41433a876765 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Fri, 21 Apr 2023 16:03:42 +0200 Subject: [PATCH 1/4] fix: selectively ignore one warning instead of all warnings (#235) ### Summary of Changes When we tried to test whether a specific warning was issued, we discovered that these tests failed if all tests were run but passed if only the test for the warning was run. This happened because one module basically disabled all warnings when it was loaded. This behavior is now removed and instead one specific warning is disabled for the one call that may create it. --- .../data/tabular/transformation/_label_encoder.py | 10 ---------- src/safeds/ml/classical/_util_sklearn.py | 5 ++++- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/src/safeds/data/tabular/transformation/_label_encoder.py b/src/safeds/data/tabular/transformation/_label_encoder.py index e2004c15f..2dd4fe6db 100644 --- a/src/safeds/data/tabular/transformation/_label_encoder.py +++ b/src/safeds/data/tabular/transformation/_label_encoder.py @@ -1,8 +1,5 @@ from __future__ import annotations -import warnings -from typing import Any - from sklearn.preprocessing import OrdinalEncoder as sk_OrdinalEncoder from safeds.data.tabular.containers import Table @@ -12,13 +9,6 @@ ) -def warn(*_: Any, **__: Any) -> None: - pass - - -warnings.warn = warn - - # noinspection PyProtectedMember class LabelEncoder(InvertibleTableTransformer): """The LabelEncoder encodes one or more given columns into labels.""" diff --git a/src/safeds/ml/classical/_util_sklearn.py b/src/safeds/ml/classical/_util_sklearn.py index d5a987f48..145ee06f5 100644 --- a/src/safeds/ml/classical/_util_sklearn.py +++ b/src/safeds/ml/classical/_util_sklearn.py @@ -1,3 +1,4 @@ +import warnings from typing import Any from safeds.data.tabular.containers import Table, TaggedTable @@ -84,7 +85,9 @@ def predict(model: Any, dataset: Table, feature_names: list[str] | None, target_ result_set.columns = dataset.column_names try: - predicted_target_vector = model.predict(dataset_df.values) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message="X does not have valid feature names") + predicted_target_vector = model.predict(dataset_df.values) result_set[target_name] = predicted_target_vector return Table(result_set).tag_columns(target_name=target_name, feature_names=feature_names) except ValueError as exception: From 4a1a7367099125d2a072bf36686063de7180e8f0 Mon Sep 17 00:00:00 2001 From: robmeth <91134475+robmeth@users.noreply.github.com> Date: Fri, 21 Apr 2023 16:47:11 +0200 Subject: [PATCH 2/4] feat: add parameter `lasso_ratio` to `ElasticNetRegression` (#237) Closes #166. ### Summary of Changes Added parameter `lasso_ratio` to `ElasticNetRegression` and tests for edge cases 0, 1, invalid and default. --------- Co-authored-by: zzril <> Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> --- .../regression/_elastic_net_regression.py | 22 +++++++++-- .../regression/test_elastic_net_regression.py | 39 +++++++++++++++++++ 2 files changed, 58 insertions(+), 3 deletions(-) create mode 100644 tests/safeds/ml/classical/regression/test_elastic_net_regression.py diff --git a/src/safeds/ml/classical/regression/_elastic_net_regression.py b/src/safeds/ml/classical/regression/_elastic_net_regression.py index 05de26dfb..878544be5 100644 --- a/src/safeds/ml/classical/regression/_elastic_net_regression.py +++ b/src/safeds/ml/classical/regression/_elastic_net_regression.py @@ -1,5 +1,6 @@ from __future__ import annotations +import warnings from typing import TYPE_CHECKING from sklearn.linear_model import ElasticNet as sk_ElasticNet @@ -15,7 +16,22 @@ class ElasticNetRegression(Regressor): """Elastic net regression.""" - def __init__(self) -> None: + def __init__(self, lasso_ratio: float = 0.5) -> None: + if lasso_ratio < 0 or lasso_ratio > 1: + raise ValueError("lasso_ratio must be between 0 and 1.") + elif lasso_ratio == 0: + warnings.warn( + "ElasticNetRegression with lasso_ratio = 0 is essentially RidgeRegression." + " Use RidgeRegression instead for better numerical stability.", + stacklevel=1, + ) + elif lasso_ratio == 1: + warnings.warn( + "ElasticNetRegression with lasso_ratio = 0 is essentially LassoRegression." + " Use LassoRegression instead for better numerical stability.", + stacklevel=1, + ) + self.lasso_ratio = lasso_ratio self._wrapped_regressor: sk_ElasticNet | None = None self._feature_names: list[str] | None = None self._target_name: str | None = None @@ -41,10 +57,10 @@ def fit(self, training_set: TaggedTable) -> ElasticNetRegression: LearningError If the training data contains invalid values or if the training failed. """ - wrapped_regressor = sk_ElasticNet() + wrapped_regressor = sk_ElasticNet(l1_ratio=self.lasso_ratio) fit(wrapped_regressor, training_set) - result = ElasticNetRegression() + result = ElasticNetRegression(self.lasso_ratio) result._wrapped_regressor = wrapped_regressor result._feature_names = training_set.features.column_names result._target_name = training_set.target.name diff --git a/tests/safeds/ml/classical/regression/test_elastic_net_regression.py b/tests/safeds/ml/classical/regression/test_elastic_net_regression.py new file mode 100644 index 000000000..3ea0d3d4d --- /dev/null +++ b/tests/safeds/ml/classical/regression/test_elastic_net_regression.py @@ -0,0 +1,39 @@ +import pytest +from safeds.data.tabular.containers import Table +from safeds.ml.classical.regression._elastic_net_regression import ElasticNetRegression + + +def test_lasso_ratio_valid() -> None: + training_set = Table.from_dict({"col1": [1, 2, 3, 4], "col2": [1, 2, 3, 4]}) + tagged_training_set = training_set.tag_columns(target_name="col1", feature_names=["col2"]) + lasso_ratio = 0.3 + + elastic_net_regression = ElasticNetRegression(lasso_ratio).fit(tagged_training_set) + assert elastic_net_regression._wrapped_regressor is not None + assert elastic_net_regression._wrapped_regressor.l1_ratio == lasso_ratio + + +def test_lasso_ratio_invalid() -> None: + with pytest.raises(ValueError, match="lasso_ratio must be between 0 and 1."): + ElasticNetRegression(-1) + + +def test_lasso_ratio_zero() -> None: + with pytest.warns( + UserWarning, + match="ElasticNetRegression with lasso_ratio = 0 is essentially RidgeRegression." + " Use RidgeRegression instead for better numerical stability.", + ): + ElasticNetRegression(0) + + +def test_lasso_ratio_one() -> None: + with pytest.warns( + UserWarning, + match="ElasticNetRegression with lasso_ratio = 0 is essentially LassoRegression." + " Use LassoRegression instead for better numerical stability.", + ): + ElasticNetRegression(1) + + +# (Default parameter is tested in `test_regressor.py`.) From b3893cc81a3cff2f96a331dfe3b5bb44edca4a8e Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Fri, 21 Apr 2023 17:00:16 +0200 Subject: [PATCH 3/4] chore: ignore `.DS_Store` files --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index eab7fca54..eb88cf746 100644 --- a/.gitignore +++ b/.gitignore @@ -45,5 +45,5 @@ report/ megalinter-reports/ # Other -.DS_Store/ +.DS_Store *.log From 846bf233235b2cdaf9bbd00cacb89ea44e94011b Mon Sep 17 00:00:00 2001 From: Alexander <47296670+Marsmaennchen221@users.noreply.github.com> Date: Fri, 21 Apr 2023 19:50:28 +0200 Subject: [PATCH 4/4] feat: Added method `Table.inverse_transform_table` which returns the original table (#227) Closes #111. ### Summary of Changes Added method `Table.inverse_transform_table` which takes the fitted transformer and returns the original table --------- Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Co-authored-by: Lars Reimann Co-authored-by: sibre28 <86068340+sibre28@users.noreply.github.com> --- src/safeds/data/tabular/containers/_table.py | 42 +++++++ .../_table/test_inverse_transform_table.py | 119 ++++++++++++++++++ 2 files changed, 161 insertions(+) create mode 100644 tests/safeds/data/tabular/containers/_table/test_inverse_transform_table.py diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 13d212016..a37f4c7ae 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -34,6 +34,8 @@ if TYPE_CHECKING: from collections.abc import Callable, Iterable + from safeds.data.tabular.transformation import InvertibleTableTransformer + from ._tagged_table import TaggedTable @@ -991,6 +993,46 @@ def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Tabl return self.replace_column(name, result) raise UnknownColumnNameError([name]) + def inverse_transform_table(self, transformer: InvertibleTableTransformer) -> Table: + """ + Invert the transformation applied by the given transformer. + + Parameters + ---------- + transformer : InvertibleTableTransformer + A transformer that was fitted with columns, which are all present in the table. + + Returns + ------- + table : Table + The original table + + Raises + ------ + TransformerNotFittedError + If the transformer has not been fitted yet. + + Examples + -------- + >>> from safeds.data.tabular.transformation import OneHotEncoder + >>> from safeds.data.tabular.containers import Table + >>> transformer = OneHotEncoder() + >>> table = Table.from_dict({"col1": [1, 2, 1], "col2": [1, 2, 4]}) + >>> transformer = transformer.fit(table, None) + >>> transformed_table = transformer.transform(table) + >>> transformed_table.inverse_transform_table(transformer) + col1 col2 + 0 1 1 + 1 2 2 + 2 1 4 + >>> transformer.inverse_transform(transformed_table) + col1 col2 + 0 1 1 + 1 2 2 + 2 1 4 + """ + return transformer.inverse_transform(self) + # ------------------------------------------------------------------------------------------------------------------ # Plotting # ------------------------------------------------------------------------------------------------------------------ diff --git a/tests/safeds/data/tabular/containers/_table/test_inverse_transform_table.py b/tests/safeds/data/tabular/containers/_table/test_inverse_transform_table.py new file mode 100644 index 000000000..84ee05527 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_table/test_inverse_transform_table.py @@ -0,0 +1,119 @@ +import pytest +from safeds.data.tabular.containers import Table +from safeds.data.tabular.exceptions import TransformerNotFittedError +from safeds.data.tabular.transformation import OneHotEncoder + + +class TestInverseTransformTableOnOneHotEncoder: + @pytest.mark.parametrize( + ("table_to_fit", "column_names", "table_to_transform"), + [ + ( + Table.from_dict( + { + "a": [1.0, 0.0, 0.0, 0.0], + "b": ["a", "b", "b", "c"], + "c": [0.0, 0.0, 0.0, 1.0], + }, + ), + ["b"], + Table.from_dict( + { + "a": [1.0, 0.0, 0.0, 0.0], + "b": ["a", "b", "b", "c"], + "c": [0.0, 0.0, 0.0, 1.0], + }, + ), + ), + ( + Table.from_dict( + { + "a": [1.0, 0.0, 0.0, 0.0], + "b": ["a", "b", "b", "c"], + "c": [0.0, 0.0, 0.0, 1.0], + }, + ), + ["b"], + Table.from_dict( + { + "c": [0.0, 0.0, 0.0, 1.0], + "b": ["a", "b", "b", "c"], + "a": [1.0, 0.0, 0.0, 0.0], + }, + ), + ), + ( + Table.from_dict( + { + "a": [1.0, 0.0, 0.0, 0.0], + "b": ["a", "b", "b", "c"], + "bb": ["a", "b", "b", "c"], + }, + ), + ["b", "bb"], + Table.from_dict( + { + "a": [1.0, 0.0, 0.0, 0.0], + "b": ["a", "b", "b", "c"], + "bb": ["a", "b", "b", "c"], + }, + ), + ), + ], + ids=[ + "same table to fit and transform", + "different tables to fit and transform", + "one column name is a prefix of another column name", + ], + ) + def test_should_return_original_table( + self, + table_to_fit: Table, + column_names: list[str], + table_to_transform: Table, + ) -> None: + transformer = OneHotEncoder().fit(table_to_fit, column_names) + transformed_table = transformer.transform(table_to_transform) + + result = transformed_table.inverse_transform_table(transformer) + + # This checks whether the columns are in the same order + assert result.column_names == table_to_transform.column_names + # This is subsumed by the next assertion, but we get a better error message + assert result.schema == table_to_transform.schema + assert result == table_to_transform + + def test_should_not_change_transformed_table(self) -> None: + table = Table.from_dict( + { + "col1": ["a", "b", "b", "c"], + }, + ) + + transformer = OneHotEncoder().fit(table, None) + transformed_table = transformer.transform(table) + transformed_table.inverse_transform_table(transformer) + + expected = Table.from_dict( + { + "col1_a": [1.0, 0.0, 0.0, 0.0], + "col1_b": [0.0, 1.0, 1.0, 0.0], + "col1_c": [0.0, 0.0, 0.0, 1.0], + }, + ) + + assert transformed_table == expected + + def test_should_raise_if_not_fitted(self) -> None: + table = Table.from_dict( + { + "a": [1.0, 0.0, 0.0, 0.0], + "b": [0.0, 1.0, 1.0, 0.0], + "c": [0.0, 0.0, 0.0, 1.0], + }, + ) + + transformer = OneHotEncoder() + + with pytest.raises(TransformerNotFittedError): + table.inverse_transform_table(transformer)