Skip to content

Commit

Permalink
Formatted the code
Browse files Browse the repository at this point in the history
  • Loading branch information
prithagupta committed Aug 16, 2024
1 parent 50480f5 commit 34e39f7
Show file tree
Hide file tree
Showing 46 changed files with 1,724 additions and 540 deletions.
1 change: 1 addition & 0 deletions autoqild/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Documentation of the API for public objects and functions in `autoqild`
package."""

from .automl import *
from .bayes_search import *
from .classifiers import *
Expand Down
1 change: 1 addition & 0 deletions autoqild/automl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@
These tools simplify the process of training, validating, and deploying
machine learning models with minimal manual intervention.
"""

from .autogluon_classifier import AutoGluonClassifier
from .tabpfn_classifier import AutoTabPFNClassifier
85 changes: 66 additions & 19 deletions autoqild/automl/autogluon_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from autoqild.automl.automl_core import AutomlClassifier
from .model_configurations import hyperparameters, reduced_hyperparameters
from ..utilities._utils import log_exception_error
from ..utilities.utils import log_exception_error


class AutoGluonClassifier(AutomlClassifier):
Expand Down Expand Up @@ -95,9 +95,21 @@ class AutoGluonClassifier(AutomlClassifier):
Property to check if the model is already fitted.
"""

def __init__(self, n_features, n_classes, time_limit=1800, output_folder=None, eval_metric="accuracy",
use_hyperparameters=True, delete_tmp_folder_after_terminate=True, auto_stack=True,
remove_boosting_models=True, verbosity=6, random_state=None, **kwargs):
def __init__(
self,
n_features,
n_classes,
time_limit=1800,
output_folder=None,
eval_metric="accuracy",
use_hyperparameters=True,
delete_tmp_folder_after_terminate=True,
auto_stack=True,
remove_boosting_models=True,
verbosity=6,
random_state=None,
**kwargs,
):
self.logger = logging.getLogger(name=AutoGluonClassifier.__name__)
self.random_state = check_random_state(random_state)
self.output_folder = output_folder
Expand All @@ -114,7 +126,16 @@ def __init__(self, n_features, n_classes, time_limit=1800, output_folder=None, e
else:
self.hyperparameters = None
if remove_boosting_models:
self.exclude_model_types = ["GBM", "CAT", "XGB", "LGB", "KNN", "NN_TORCH", "AG_AUTOMM", "LR"]
self.exclude_model_types = [
"GBM",
"CAT",
"XGB",
"LGB",
"KNN",
"NN_TORCH",
"AG_AUTOMM",
"LR",
]
else:
self.exclude_model_types = ["AG_AUTOMM", "LR"]
self.auto_stack = auto_stack
Expand All @@ -124,7 +145,9 @@ def __init__(self, n_features, n_classes, time_limit=1800, output_folder=None, e
self.time_limit = time_limit
self.model = None
self.class_label = "class"
self.columns = [f"feature_{i}" for i in range(self.n_features)] + [self.class_label]
self.columns = [f"feature_{i}" for i in range(self.n_features)] + [
self.class_label
]
if self.n_classes > 2:
self.problem_type = "multiclass"
if self.n_classes == 2:
Expand Down Expand Up @@ -153,21 +176,29 @@ def _is_fitted_(self) -> bool:

if self.model is not None:
self.leaderboard = self.model.leaderboard(extra_info=True)
time_taken = self.leaderboard["fit_time"].sum() + self.leaderboard["pred_time_val"].sum() + 20
time_taken = (
self.leaderboard["fit_time"].sum()
+ self.leaderboard["pred_time_val"].sum()
+ 20
)
difference = self.time_limit - time_taken
if 200 <= self.time_limit < 300:
limit = 150
elif self.time_limit >= 3000:
limit = 2000
else:
limit = 200
self.logger.info(f"Fitting time of the model {time_taken} and remaining {difference}, limit {limit}")
self.logger.info(
f"Fitting time of the model {time_taken} and remaining {difference}, limit {limit}"
)
num_models = len(self.leaderboard["fit_time"])
self.logger.info(f"Number of models trained is {num_models} ")
if num_models < 1200:
if num_models <= 50:
self.model = None
self.logger.info(f"Retraining the model since they are less than 50")
self.logger.info(
f"Retraining the model since they are less than 50"
)
if difference >= limit:
self.model = None
else:
Expand All @@ -176,8 +207,10 @@ def _is_fitted_(self) -> bool:
if self.model is None:
try:
shutil.rmtree(self.output_folder)
self.logger.error(f"Since the model is not completely fitted, the folder '{basename}' "
f"and its contents are deleted successfully.")
self.logger.error(
f"Since the model is not completely fitted, the folder '{basename}' "
f"and its contents are deleted successfully."
)
except OSError as error:
log_exception_error(self.logger, error)
self.logger.error(f"Folder does not exist")
Expand All @@ -202,15 +235,27 @@ def fit(self, X, y, **kwd):
while not self._is_fitted_:
try:
self.logger.info("Fitting the model from scratch")
self.model = TabularPredictor(label=self.class_label, sample_weight=self.sample_weight,
problem_type=self.problem_type, eval_metric=self.eval_metric,
path=self.output_folder, verbosity=self.verbosity)
self.model.fit(train_data, time_limit=self.time_limit, hyperparameters=self.hyperparameters,
hyperparameter_tune_kwargs=self.hyperparameter_tune_kwargs, auto_stack=self.auto_stack,
excluded_model_types=self.exclude_model_types)
self.model = TabularPredictor(
label=self.class_label,
sample_weight=self.sample_weight,
problem_type=self.problem_type,
eval_metric=self.eval_metric,
path=self.output_folder,
verbosity=self.verbosity,
)
self.model.fit(
train_data,
time_limit=self.time_limit,
hyperparameters=self.hyperparameters,
hyperparameter_tune_kwargs=self.hyperparameter_tune_kwargs,
auto_stack=self.auto_stack,
excluded_model_types=self.exclude_model_types,
)
except Exception as error:
log_exception_error(self.logger, error)
self.logger.error("Fit function did not work, checking the saved models")
self.logger.error(
"Fit function did not work, checking the saved models"
)
self.leaderboard = self.model.leaderboard(extra_info=True)
if self.delete_tmp_folder_after_terminate:
self.model.delete_models(models_to_keep="best", dry_run=False)
Expand Down Expand Up @@ -334,7 +379,9 @@ def convert_to_dataframe(self, X, y=None):
data = np.concatenate((X, y[:, None]), axis=1)

if self.n_features != X.shape[-1]:
raise ValueError(f"Dataset passed does not contain {self.n_features} features")
raise ValueError(
f"Dataset passed does not contain {self.n_features} features"
)

df_data = pd.DataFrame(data=data, columns=self.columns)
return df_data
Expand Down
23 changes: 17 additions & 6 deletions autoqild/automl/automl_core.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Abstract base class for AutoML classifiers."""

from abc import abstractmethod

from sklearn.base import BaseEstimator, ClassifierMixin
Expand Down Expand Up @@ -60,7 +61,7 @@ class AutomlClassifier(BaseEstimator, ClassifierMixin):

@abstractmethod
def fit(self, X, y, **kwd):
"""" Fit the AutoML classifier on the provided dataset.
"""Fit the AutoML classifier on the provided dataset.
Parameters
----------
Expand Down Expand Up @@ -89,7 +90,9 @@ def fit(self, X, y, **kwd):
for training the classifier on the dataset provided in `X` and `y`.
"""

raise NotImplementedError("The 'fit' method must be implemented by the subclass.")
raise NotImplementedError(
"The 'fit' method must be implemented by the subclass."
)

@abstractmethod
def score(self, X, y, sample_weight=None, verbose=0):
Expand Down Expand Up @@ -120,7 +123,9 @@ def score(self, X, y, sample_weight=None, verbose=0):
NotImplementedError
If the method is not implemented by the subclass.
"""
raise NotImplementedError("The 'fit' method must be implemented by the subclass.")
raise NotImplementedError(
"The 'fit' method must be implemented by the subclass."
)

@abstractmethod
def predict(self, X, verbose=0):
Expand All @@ -144,7 +149,9 @@ def predict(self, X, verbose=0):
NotImplementedError
If the method is not implemented by the subclass.
"""
raise NotImplementedError("The 'predict' method must be implemented by the subclass.")
raise NotImplementedError(
"The 'predict' method must be implemented by the subclass."
)

@abstractmethod
def predict_proba(self, X, verbose=0):
Expand All @@ -168,7 +175,9 @@ def predict_proba(self, X, verbose=0):
NotImplementedError
If the method is not implemented by the subclass.
"""
raise NotImplementedError("The 'predict_proba' method must be implemented by the subclass.")
raise NotImplementedError(
"The 'predict_proba' method must be implemented by the subclass."
)

@abstractmethod
def decision_function(self, X, verbose=0):
Expand All @@ -194,7 +203,9 @@ def decision_function(self, X, verbose=0):
NotImplementedError
If the method is not implemented by the subclass.
"""
raise NotImplementedError("The 'decision_function' method must be implemented by the subclass.")
raise NotImplementedError(
"The 'decision_function' method must be implemented by the subclass."
)

def get_params(self, deep=True):
"""Get parameters for this estimator.
Expand Down
19 changes: 13 additions & 6 deletions autoqild/automl/model_configurations.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Configurations for search space for AutoGluon tools."""

from autogluon.common.space import Real, Int, Categorical

hyperparameters = {
Expand Down Expand Up @@ -36,7 +37,10 @@
"learning_rate": Real(1e-5, 1e-1, default=5e-4, log=True),
"wd": Real(1e-6, 1e-1, default=5e-4, log=True),
"emb_drop": Real(0.0, 0.5),
"ps": Real(0.0, 0.5, ),
"ps": Real(
0.0,
0.5,
),
"smoothing": Real(0.0, 0.5),
},
"RF": {
Expand All @@ -46,7 +50,7 @@
"max_features": Categorical("sqrt", "log2"),
"min_samples_leaf": Int(lower=2, upper=50, default=10),
"min_samples_split": Int(lower=2, upper=50, default=10),
"class_weight": Categorical("balanced", "balanced_subsample")
"class_weight": Categorical("balanced", "balanced_subsample"),
},
"XT": {
"n_estimators": Int(20, 300),
Expand All @@ -55,7 +59,7 @@
"max_features": Categorical("sqrt", "log2"),
"min_samples_leaf": Int(lower=2, upper=50, default=10),
"min_samples_split": Int(lower=2, upper=50, default=10),
"class_weight": Categorical("balanced", "balanced_subsample")
"class_weight": Categorical("balanced", "balanced_subsample"),
},
"KNN": {
"weights": Categorical("uniform", "distance"),
Expand All @@ -76,7 +80,10 @@
"learning_rate": Real(1e-5, 1e-1, default=5e-4, log=True),
"wd": Real(1e-6, 1e-1, default=5e-4, log=True),
"emb_drop": Real(0.0, 0.5),
"ps": Real(0.0, 0.5, ),
"ps": Real(
0.0,
0.5,
),
"smoothing": Real(0.0, 0.5),
},
"RF": {
Expand All @@ -86,7 +93,7 @@
"max_features": Categorical("sqrt", "log2"),
"min_samples_leaf": Int(lower=2, upper=50, default=10),
"min_samples_split": Int(lower=2, upper=50, default=10),
"class_weight": Categorical("balanced")
"class_weight": Categorical("balanced"),
},
"XT": {
"n_estimators": Int(20, 300),
Expand All @@ -95,7 +102,7 @@
"max_features": Categorical("sqrt", "log2"),
"min_samples_leaf": Int(lower=2, upper=50, default=10),
"min_samples_split": Int(lower=2, upper=50, default=10),
"class_weight": Categorical("balanced")
"class_weight": Categorical("balanced"),
},
}
"""This dictionary defines the hyperparameters for simpler models like
Expand Down
Loading

0 comments on commit 34e39f7

Please sign in to comment.