diff --git a/lightautoml/automl/base.py b/lightautoml/automl/base.py index c29fcba3..69987c23 100644 --- a/lightautoml/automl/base.py +++ b/lightautoml/automl/base.py @@ -185,6 +185,20 @@ def fit_predict( self.timer.start() train_dataset = self.reader.fit_read(train_data, train_features, roles) + # Saving class mapping + if self.reader.task.name == "binary": + self.classes_ = [1] + elif self.reader.task.name == "multi:reg": + self.classes_ = roles["target"] + elif self.reader.task.name == "reg": + self.classes_ = [roles["target"]] + else: + self.classes_ = ( + sorted(self.reader.class_mapping, key=self.reader.class_mapping.get, reverse=False) + if self.reader.class_mapping + else None + ) + assert ( len(self._levels) <= 1 or train_dataset.folds is not None ), "Not possible to fit more than 1 level without cv folds" @@ -259,7 +273,7 @@ def fit_predict( else: break - blended_prediction, last_pipes = self.blender.fit_predict(level_predictions, pipes) + blended_prediction, last_pipes = self.blender.fit_predict(level_predictions, pipes, self.classes_) self.levels.append(last_pipes) self.reader.upd_used_features(remove=list(set(self.reader.used_features) - set(self.collect_used_feats()))) diff --git a/lightautoml/automl/blend.py b/lightautoml/automl/blend.py index 7060270b..d1ba9706 100644 --- a/lightautoml/automl/blend.py +++ b/lightautoml/automl/blend.py @@ -42,7 +42,7 @@ def outp_dim(self) -> int: # noqa: D102 return self._outp_dim def fit_predict( - self, predictions: Sequence[LAMLDataset], pipes: Sequence[MLPipeline] + self, predictions: Sequence[LAMLDataset], pipes: Sequence[MLPipeline], class_mapping: dict ) -> Tuple[LAMLDataset, Sequence[MLPipeline]]: """Wraps custom ``._fit_predict`` methods of blenders. @@ -63,7 +63,7 @@ def fit_predict( self._bypass = True return predictions[0], pipes - return self._fit_predict(predictions, pipes) + return self._fit_predict(predictions, pipes, class_mapping) def _fit_predict( self, predictions: Sequence[LAMLDataset], pipes: Sequence[MLPipeline] @@ -134,7 +134,7 @@ def split_models(self, predictions: Sequence[LAMLDataset]) -> Tuple[Sequence[LAM return splitted_preds, model_idx, pipe_idx - def _set_metadata(self, predictions: Sequence[LAMLDataset], pipes: Sequence[MLPipeline]): + def _set_metadata(self, predictions: Sequence[LAMLDataset], pipes: Sequence[MLPipeline], class_mapping: dict): pred0 = predictions[0] pipe0 = pipes[0] @@ -143,6 +143,8 @@ def _set_metadata(self, predictions: Sequence[LAMLDataset], pipes: Sequence[MLPi self._outp_prob = pred0.task.name in ["binary", "multiclass"] self._score = predictions[0].task.get_dataset_metric() + self._class_mapping = class_mapping + def score(self, dataset: LAMLDataset) -> float: """Score metric for blender. @@ -321,7 +323,7 @@ def _get_weighted_pred(self, splitted_preds: Sequence[NumpyDataset], wts: Option outp = splitted_preds[0].empty() outp.set_data( weighted_pred, - ["WeightedBlend_{0}".format(x) for x in range(weighted_pred.shape[1])], + self._class_mapping if self._class_mapping else list(range(weighted_pred.shape[1])), NumericRole(np.float32, prob=self._outp_prob), ) @@ -436,7 +438,7 @@ def _prune_pipe( return new_pipes, wts def _fit_predict( - self, predictions: Sequence[NumpyDataset], pipes: Sequence[MLPipeline] + self, predictions: Sequence[NumpyDataset], pipes: Sequence[MLPipeline], class_mapping: dict ) -> Tuple[NumpyDataset, Sequence[MLPipeline]]: """Perform coordinate descent. @@ -451,7 +453,7 @@ def _fit_predict( Dataset and MLPipeline. """ - self._set_metadata(predictions, pipes) + self._set_metadata(predictions, pipes, class_mapping) splitted_preds, _, pipe_idx = cast(List[NumpyDataset], self.split_models(predictions)) wts = self._optimize(splitted_preds)