From 96e94493f02a14a4721287025158416b6246c07e Mon Sep 17 00:00:00 2001 From: AFThielmann Date: Wed, 17 Jul 2024 20:49:41 +0200 Subject: [PATCH] delete basis expansion --- mambular/models/__init__.py | 8 - mambular/models/basisexpandft.py | 330 ------------------------------- 2 files changed, 338 deletions(-) delete mode 100644 mambular/models/basisexpandft.py diff --git a/mambular/models/__init__.py b/mambular/models/__init__.py index a533af5..b23f49f 100644 --- a/mambular/models/__init__.py +++ b/mambular/models/__init__.py @@ -14,11 +14,6 @@ TabTransformerLSS, TabTransformerRegressor, ) -from .basisexpandft import ( - BasisFTTransformerClassifier, - BasisFTTransformerRegressor, - BasisFTTransformerLSS, -) from .mambatab import MambaTabClassifier, MambaTabRegressor, MambaTabLSS from .rnn import RNNClassifier, RNNRegressor, RNNLSS @@ -42,9 +37,6 @@ "SklearnBaseClassifier", "SklearnBaseLSS", "SklearnBaseRegressor", - "BasisFTTransformerClassifier", - "BasisFTTransformerRegressor", - "BasisFTTransformerLSS", "MambaTabRegressor", "MambaTabClassifier", "MambaTabLSS", diff --git a/mambular/models/basisexpandft.py b/mambular/models/basisexpandft.py deleted file mode 100644 index 4687820..0000000 --- a/mambular/models/basisexpandft.py +++ /dev/null @@ -1,330 +0,0 @@ -from .sklearn_base_regressor import SklearnBaseRegressor -from .sklearn_base_classifier import SklearnBaseClassifier -from .sklearn_base_lss import SklearnBaseLSS - -from ..base_models.basis_expansion_fttransformer import BasisFTTransformer -from ..configs.fttransformer_config import DefaultFTTransformerConfig - - -class BasisFTTransformerRegressor(SklearnBaseRegressor): - """ - FTTransformer regressor. This class extends the SklearnBaseRegressor class and uses the FTTransformer model - with the default FTTransformer configuration. - - The accepted arguments to the FTTransformerRegressor class include both the attributes in the DefaultFTTransformerConfig dataclass - and the parameters for the Preprocessor class. - - Parameters - ---------- - lr : float, default=1e-04 - Learning rate for the optimizer. - lr_patience : int, default=10 - Number of epochs with no improvement after which learning rate will be reduced. - weight_decay : float, default=1e-06 - Weight decay (L2 penalty) for the optimizer. - lr_factor : float, default=0.1 - Factor by which the learning rate will be reduced. - d_model : int, default=64 - Dimensionality of the model. - n_layers : int, default=8 - Number of layers in the transformer. - n_heads : int, default=4 - Number of attention heads in the transformer. - attn_dropout : float, default=0.3 - Dropout rate for the attention mechanism. - ff_dropout : float, default=0.3 - Dropout rate for the feed-forward layers. - norm : str, default="RMSNorm" - Normalization method to be used. - activation : callable, default=nn.SELU() - Activation function for the transformer. - num_embedding_activation : callable, default=nn.Identity() - Activation function for numerical embeddings. - head_layer_sizes : list, default=(128, 64, 32) - Sizes of the layers in the head of the model. - head_dropout : float, default=0.5 - Dropout rate for the head layers. - head_skip_layers : bool, default=False - Whether to skip layers in the head. - head_activation : callable, default=nn.SELU() - Activation function for the head layers. - head_use_batch_norm : bool, default=False - Whether to use batch normalization in the head layers. - layer_norm_after_embedding : bool, default=False - Whether to apply layer normalization after embedding. - pooling_method : str, default="cls" - Pooling method to be used ('cls', 'avg', etc.). - norm_first : bool, default=False - Whether to apply normalization before other operations in each transformer block. - bias : bool, default=True - Whether to use bias in the linear layers. - transformer_activation : callable, default=nn.SELU() - Activation function for the transformer layers. - layer_norm_eps : float, default=1e-05 - Epsilon value for layer normalization. - transformer_dim_feedforward : int, default=512 - Dimensionality of the feed-forward layers in the transformer. - n_bins : int, default=50 - The number of bins to use for numerical feature binning. This parameter is relevant - only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. - numerical_preprocessing : str, default="ple" - The preprocessing strategy for numerical features. Valid options are - 'binning', 'one_hot', 'standardization', and 'normalization'. - use_decision_tree_bins : bool, default=False - If True, uses decision tree regression/classification to determine - optimal bin edges for numerical feature binning. This parameter is - relevant only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. - binning_strategy : str, default="uniform" - Defines the strategy for binning numerical features. Options include 'uniform', - 'quantile', or other sklearn-compatible strategies. - cat_cutoff : float or int, default=0.03 - Indicates the cutoff after which integer values are treated as categorical. - If float, it's treated as a percentage. If int, it's the maximum number of - unique values for a column to be considered categorical. - treat_all_integers_as_numerical : bool, default=False - If True, all integer columns will be treated as numerical, regardless - of their unique value count or proportion. - degree : int, default=3 - The degree of the polynomial features to be used in preprocessing. - knots : int, default=12 - The number of knots to be used in spline transformations. - - Notes - ----- - - The accepted arguments to the FTTransformerRegressor class are the same as the attributes in the DefaultFTTransformerConfig dataclass. - - FTTransformerRegressor uses SklearnBaseRegressor as the parent class. The methods for fitting, predicting, and evaluating the model are inherited from the parent class. Please refer to the parent class for more information. - - See Also - -------- - mambular.models.SklearnBaseRegressor : The parent class for FTTransformerRegressor. - - Examples - -------- - >>> from mambular.models import FTTransformerRegressor - >>> model = FTTransformerRegressor(d_model=64, n_layers=8) - >>> model.fit(X_train, y_train) - >>> preds = model.predict(X_test) - >>> model.evaluate(X_test, y_test) - """ - - def __init__(self, **kwargs): - super().__init__( - model=BasisFTTransformer, config=DefaultFTTransformerConfig, **kwargs - ) - - -class BasisFTTransformerClassifier(SklearnBaseClassifier): - """ - FTTransformer Classifier. This class extends the SklearnBaseClassifier class and uses the FTTransformer model - with the default FTTransformer configuration. - - The accepted arguments to the FTTransformerClassifier class include both the attributes in the DefaultFTTransformerConfig dataclass - and the parameters for the Preprocessor class. - - Parameters - ---------- - lr : float, default=1e-04 - Learning rate for the optimizer. - lr_patience : int, default=10 - Number of epochs with no improvement after which learning rate will be reduced. - weight_decay : float, default=1e-06 - Weight decay (L2 penalty) for the optimizer. - lr_factor : float, default=0.1 - Factor by which the learning rate will be reduced. - d_model : int, default=64 - Dimensionality of the model. - n_layers : int, default=8 - Number of layers in the transformer. - n_heads : int, default=4 - Number of attention heads in the transformer. - attn_dropout : float, default=0.3 - Dropout rate for the attention mechanism. - ff_dropout : float, default=0.3 - Dropout rate for the feed-forward layers. - norm : str, default="RMSNorm" - Normalization method to be used. - activation : callable, default=nn.SELU() - Activation function for the transformer. - num_embedding_activation : callable, default=nn.Identity() - Activation function for numerical embeddings. - head_layer_sizes : list, default=(128, 64, 32) - Sizes of the layers in the head of the model. - head_dropout : float, default=0.5 - Dropout rate for the head layers. - head_skip_layers : bool, default=False - Whether to skip layers in the head. - head_activation : callable, default=nn.SELU() - Activation function for the head layers. - head_use_batch_norm : bool, default=False - Whether to use batch normalization in the head layers. - layer_norm_after_embedding : bool, default=False - Whether to apply layer normalization after embedding. - pooling_method : str, default="cls" - Pooling method to be used ('cls', 'avg', etc.). - norm_first : bool, default=False - Whether to apply normalization before other operations in each transformer block. - bias : bool, default=True - Whether to use bias in the linear layers. - transformer_activation : callable, default=nn.SELU() - Activation function for the transformer layers. - layer_norm_eps : float, default=1e-05 - Epsilon value for layer normalization. - transformer_dim_feedforward : int, default=512 - Dimensionality of the feed-forward layers in the transformer. - n_bins : int, default=50 - The number of bins to use for numerical feature binning. This parameter is relevant - only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. - numerical_preprocessing : str, default="ple" - The preprocessing strategy for numerical features. Valid options are - 'binning', 'one_hot', 'standardization', and 'normalization'. - use_decision_tree_bins : bool, default=False - If True, uses decision tree regression/classification to determine - optimal bin edges for numerical feature binning. This parameter is - relevant only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. - binning_strategy : str, default="uniform" - Defines the strategy for binning numerical features. Options include 'uniform', - 'quantile', or other sklearn-compatible strategies. - cat_cutoff : float or int, default=0.03 - Indicates the cutoff after which integer values are treated as categorical. - If float, it's treated as a percentage. If int, it's the maximum number of - unique values for a column to be considered categorical. - treat_all_integers_as_numerical : bool, default=False - If True, all integer columns will be treated as numerical, regardless - of their unique value count or proportion. - degree : int, default=3 - The degree of the polynomial features to be used in preprocessing. - knots : int, default=12 - The number of knots to be used in spline transformations. - - Notes - ----- - - The accepted arguments to the FTTransformerClassifier class are the same as the attributes in the DefaultFTTransformerConfig dataclass. - - FTTransformerClassifier uses SklearnBaseClassifier as the parent class. The methods for fitting, predicting, and evaluating the model are inherited from the parent class. Please refer to the parent class for more information. - - See Also - -------- - mambular.models.SklearnBaseClassifier : The parent class for FTTransformerClassifier. - - Examples - -------- - >>> from mambular.models import FTTransformerClassifier - >>> model = FTTransformerClassifier(d_model=64, n_layers=8) - >>> model.fit(X_train, y_train) - >>> preds = model.predict(X_test) - >>> model.evaluate(X_test, y_test) - """ - - def __init__(self, **kwargs): - super().__init__( - model=BasisFTTransformer, config=DefaultFTTransformerConfig, **kwargs - ) - - -class BasisFTTransformerLSS(SklearnBaseLSS): - """ - FTTransformer for distributional regression. This class extends the SklearnBaseLSS class and uses the FTTransformer model - with the default FTTransformer configuration. - - The accepted arguments to the FTTransformerLSS class include both the attributes in the DefaultFTTransformerConfig dataclass - and the parameters for the Preprocessor class. - - Parameters - ---------- - lr : float, default=1e-04 - Learning rate for the optimizer. - lr_patience : int, default=10 - Number of epochs with no improvement after which learning rate will be reduced. - weight_decay : float, default=1e-06 - Weight decay (L2 penalty) for the optimizer. - lr_factor : float, default=0.1 - Factor by which the learning rate will be reduced. - d_model : int, default=64 - Dimensionality of the model. - n_layers : int, default=8 - Number of layers in the transformer. - n_heads : int, default=4 - Number of attention heads in the transformer. - attn_dropout : float, default=0.3 - Dropout rate for the attention mechanism. - ff_dropout : float, default=0.3 - Dropout rate for the feed-forward layers. - norm : str, default="RMSNorm" - Normalization method to be used. - activation : callable, default=nn.SELU() - Activation function for the transformer. - num_embedding_activation : callable, default=nn.Identity() - Activation function for numerical embeddings. - head_layer_sizes : list, default=(128, 64, 32) - Sizes of the layers in the head of the model. - head_dropout : float, default=0.5 - Dropout rate for the head layers. - head_skip_layers : bool, default=False - Whether to skip layers in the head. - head_activation : callable, default=nn.SELU() - Activation function for the head layers. - head_use_batch_norm : bool, default=False - Whether to use batch normalization in the head layers. - layer_norm_after_embedding : bool, default=False - Whether to apply layer normalization after embedding. - pooling_method : str, default="cls" - Pooling method to be used ('cls', 'avg', etc.). - norm_first : bool, default=False - Whether to apply normalization before other operations in each transformer block. - bias : bool, default=True - Whether to use bias in the linear layers. - transformer_activation : callable, default=nn.SELU() - Activation function for the transformer layers. - layer_norm_eps : float, default=1e-05 - Epsilon value for layer normalization. - transformer_dim_feedforward : int, default=512 - Dimensionality of the feed-forward layers in the transformer. - n_bins : int, default=50 - The number of bins to use for numerical feature binning. This parameter is relevant - only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. - numerical_preprocessing : str, default="ple" - The preprocessing strategy for numerical features. Valid options are - 'binning', 'one_hot', 'standardization', and 'normalization'. - use_decision_tree_bins : bool, default=False - If True, uses decision tree regression/classification to determine - optimal bin edges for numerical feature binning. This parameter is - relevant only if `numerical_preprocessing` is set to 'binning' or 'one_hot'. - binning_strategy : str, default="uniform" - Defines the strategy for binning numerical features. Options include 'uniform', - 'quantile', or other sklearn-compatible strategies. - task : str, default="regression" - Indicates the type of machine learning task ('regression' or 'classification'). This can - influence certain preprocessing behaviors, especially when using decision tree-based binning as ple. - cat_cutoff : float or int, default=0.03 - Indicates the cutoff after which integer values are treated as categorical. - If float, it's treated as a percentage. If int, it's the maximum number of - unique values for a column to be considered categorical. - treat_all_integers_as_numerical : bool, default=False - If True, all integer columns will be treated as numerical, regardless - of their unique value count or proportion. - degree : int, default=3 - The degree of the polynomial features to be used in preprocessing. - knots : int, default=12 - The number of knots to be used in spline transformations. - - Notes - ----- - - The accepted arguments to the FTTransformerLSS class are the same as the attributes in the DefaultFTTransformerConfig dataclass. - - FTTransformerLSS uses SklearnBaseLSS as the parent class. The methods for fitting, predicting, and evaluating the model are inherited from the parent class. Please refer to the parent class for more information. - - See Also - -------- - mambular.models.SklearnBaseLSS : The parent class for FTTransformerLSS. - - Examples - -------- - >>> from mambular.models import FTTransformerLSS - >>> model = FTTransformerLSS(d_model=64, n_layers=8) - >>> model.fit(X_train, y_train, family="normal") - >>> preds = model.predict(X_test) - >>> model.evaluate(X_test, y_test) - """ - - def __init__(self, **kwargs): - super().__init__( - model=BasisFTTransformer, config=DefaultFTTransformerConfig, **kwargs - )