diff --git a/NAMESPACE b/NAMESPACE index a0536eef..73daa4cc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ S3method(as.data.table,ArchiveBatchFSelect) S3method(as.data.table,DictionaryFSelector) S3method(as.data.table,EnsembleFSResult) +S3method(c,EnsembleFSResult) S3method(extract_inner_fselect_archives,BenchmarkResult) S3method(extract_inner_fselect_archives,ResampleResult) S3method(extract_inner_fselect_results,BenchmarkResult) diff --git a/NEWS.md b/NEWS.md index 46b078bc..f039c48d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,9 @@ # mlr3fselect (development version) -* Use [fastVoteR](https://github.com/bblodfon/fastVoteR) for feature ranking in `EnsembleFSResult()` objects -* Add embedded ensemble feature selection `embedded_ensemble_fselect()` -* Refactor `ensemble_fselect()` and `EnsembleFSResult()` +* refactor: Use [fastVoteR](https://github.com/bblodfon/fastVoteR) for feature ranking in `EnsembleFSResult()` objects +* feat: Add embedded ensemble feature selection `embedded_ensemble_fselect()` +* refactor/perf: `ensemble_fselect()` and `EnsembleFSResult()` +* feat: Add `c.EnsembleFSResult(...)` and `EnsembleFSResult$combine(...)` methods # mlr3fselect 1.2.1 diff --git a/R/EnsembleFSResult.R b/R/EnsembleFSResult.R index fed296f2..73560eee 100644 --- a/R/EnsembleFSResult.R +++ b/R/EnsembleFSResult.R @@ -15,6 +15,9 @@ #' * `x` ([EnsembleFSResult]) #' * `benchmark_result` (`logical(1)`)\cr #' Whether to add the learner, task and resampling information from the benchmark result. +#' * `c(...)`\cr +#' ([EnsembleFSResult], ...) -> [EnsembleFSResult]\cr +#' Combines multiple [EnsembleFSResult] objects into a new [EnsembleFSResult]. #' #' @references #' `r format_bib("das1999", "meinshausen2010")` @@ -166,6 +169,75 @@ EnsembleFSResult = R6Class("EnsembleFSResult", private$.active_measure = which }, + #' @description + #' Combines a second [EnsembleFSResult] into the current object, modifying it **in-place**. + #' If the second [EnsembleFSResult] (`efsr`) is `NULL`, the method returns the object unmodified. + #' + #' Both objects must have the same task features and `measure`. + #' If the `inner_measure` differs between the objects or is `NULL` in either, it will be set to `NULL` in the combined object. + #' Additionally, the `importance` column will be removed if it is missing in either object. + #' If both objects contain a `benchmark_result`, these will be combined. + #' Otherwise, the combined object will have a `NULL` value for `benchmark_result`. + #' + #' This method modifies the object by reference. + #' To preserve the original state, explicitly `$clone()` the object beforehand. + #' Alternatively, you can use the [c()] function, which internally calls this method. + #' + #' @param efsr ([EnsembleFSResult])\cr + #' A second [EnsembleFSResult] object to combine with the current object. + #' + #' @return + #' Returns the object itself, but modified **by reference**. + combine = function(efsr) { + if (!is.null(efsr)) { + assert_class(efsr, "EnsembleFSResult") + + # Ensure both objects have the same task features + assert_set_equal(private$.features, get_private(efsr)$.features) + + # Ensure both objects have the same (outer) measure + assert_set_equal(private$.measure$id, get_private(efsr)$.measure$id) + + # Set inner measure to NULL if the measure ids are different or one of them is NULL + inner_msr = private$.inner_measure + inner_msr2 = get_private(efsr)$.inner_measure + result2 = get_private(efsr)$.result + if (is.null(inner_msr) || is.null(inner_msr2) || inner_msr$id != inner_msr2$id) { + private$.inner_measure = NULL + + # Remove associated inner measure scores from results + if (!is.null(inner_msr)) { + private$.result[[sprintf("%s_inner", inner_msr$id)]] = NULL + } + if (!is.null(inner_msr2)) { + result2[[sprintf("%s_inner", inner_msr2$id)]] = NULL + } + } + + # remove importance scores if missing in either object + has_imp = "importance" %in% names(private$.result) + has_imp2 = "importance" %in% names(result2) + if (!has_imp || !has_imp2) { + if (has_imp) private$.result[["importance"]] = NULL + if (has_imp2) result2[["importance"]] = NULL + } + + # Combine results from both objects + private$.result = data.table::rbindlist(list(private$.result, result2), fill = FALSE) + + # Merge benchmark results if available in both objects + has_bmr = !is.null(self$benchmark_result) + has_bmr2 = !is.null(efsr$benchmark_result) + if (has_bmr && has_bmr2) { + self$benchmark_result = self$benchmark_result$combine(efsr$benchmark_result) + } else { + self$benchmark_result = NULL + } + } + + invisible(self) + }, + #' @description #' Calculates the feature ranking via [fastVoteR::rank_candidates()]. #' @@ -499,3 +571,20 @@ EnsembleFSResult = R6Class("EnsembleFSResult", as.data.table.EnsembleFSResult = function(x, ...) { x$result } + +#' @export +c.EnsembleFSResult = function(...) { + efsrs = list(...) + + # Deep clone the first object for initialization + init = efsrs[[1]]$clone(deep = TRUE) + + # If there's only one object, return it directly + if (length(efsrs) == 1) { + return(init) + } + + # Combine the remaining objects + rest = tail(efsrs, -1) + Reduce(function(lhs, rhs) lhs$combine(rhs), rest, init = init) +} diff --git a/R/embedded_ensemble_fselect.R b/R/embedded_ensemble_fselect.R index 8852d760..31f35775 100644 --- a/R/embedded_ensemble_fselect.R +++ b/R/embedded_ensemble_fselect.R @@ -88,6 +88,8 @@ embedded_ensemble_fselect = function( # extract scores on the test sets scores = bmr$score(measure) + # remove `bmr_score` class + class(scores) = c("data.table", "data.frame") set(scores, j = "features", value = features) set(scores, j = "n_features", value = n_features) diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R index 12be4636..d59d6049 100644 --- a/R/ensemble_fselect.R +++ b/R/ensemble_fselect.R @@ -136,6 +136,8 @@ ensemble_fselect = function( # extract scores on the test sets scores = bmr$score(measure) + # remove `bmr_score` class + class(scores) = c("data.table", "data.frame") set(scores, j = "features", value = features) set(scores, j = "n_features", value = n_features) diff --git a/man/ensemble_fs_result.Rd b/man/ensemble_fs_result.Rd index dd0239a3..434a0530 100644 --- a/man/ensemble_fs_result.Rd +++ b/man/ensemble_fs_result.Rd @@ -21,6 +21,9 @@ Returns a tabular view of the ensemble feature selection.\cr \item \code{benchmark_result} (\code{logical(1)})\cr Whether to add the learner, task and resampling information from the benchmark result. } +\item \code{c(...)}\cr +(\link{EnsembleFSResult}, ...) -> \link{EnsembleFSResult}\cr +Combines multiple \link{EnsembleFSResult} objects into a new \link{EnsembleFSResult}. } } @@ -119,6 +122,7 @@ Returns the number of times the task was initially resampled in the ensemble fea \item \href{#method-EnsembleFSResult-print}{\code{EnsembleFSResult$print()}} \item \href{#method-EnsembleFSResult-help}{\code{EnsembleFSResult$help()}} \item \href{#method-EnsembleFSResult-set_active_measure}{\code{EnsembleFSResult$set_active_measure()}} +\item \href{#method-EnsembleFSResult-combine}{\code{EnsembleFSResult$combine()}} \item \href{#method-EnsembleFSResult-feature_ranking}{\code{EnsembleFSResult$feature_ranking()}} \item \href{#method-EnsembleFSResult-stability}{\code{EnsembleFSResult$stability()}} \item \href{#method-EnsembleFSResult-pareto_front}{\code{EnsembleFSResult$pareto_front()}} @@ -239,6 +243,38 @@ or \code{"outer"} (measure used in test sets, default value).} } } \if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-EnsembleFSResult-combine}{}}} +\subsection{Method \code{combine()}}{ +Combines a second \link{EnsembleFSResult} into the current object, modifying it \strong{in-place}. +If the second \link{EnsembleFSResult} (\code{efsr}) is \code{NULL}, the method returns the object unmodified. + +Both objects must have the same task features and \code{measure}. +If the \code{inner_measure} differs between the objects or is \code{NULL} in either, it will be set to \code{NULL} in the combined object. +Additionally, the \code{importance} column will be removed if it is missing in either object. +If both objects contain a \code{benchmark_result}, these will be combined. +Otherwise, the combined object will have a \code{NULL} value for \code{benchmark_result}. + +This method modifies the object by reference. +To preserve the original state, explicitly \verb{$clone()} the object beforehand. +Alternatively, you can use the \code{\link[=c]{c()}} function, which internally calls this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{EnsembleFSResult$combine(efsr)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{efsr}}{(\link{EnsembleFSResult})\cr +A second \link{EnsembleFSResult} object to combine with the current object.} +} +\if{html}{\out{
}} +} +\subsection{Returns}{ +Returns the object itself, but modified \strong{by reference}. +} +} +\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-EnsembleFSResult-feature_ranking}{}}} \subsection{Method \code{feature_ranking()}}{ diff --git a/tests/testthat/test_embedded_ensemble_fselect.R b/tests/testthat/test_embedded_ensemble_fselect.R index eb8b5e7b..189bd06f 100644 --- a/tests/testthat/test_embedded_ensemble_fselect.R +++ b/tests/testthat/test_embedded_ensemble_fselect.R @@ -62,3 +62,44 @@ test_that("embedded efs works", { expect_data_table(feature_ranking, nrows = length(task$feature_names)) expect_equal(names(feature_ranking), c("feature", "score", "norm_score", "borda_score")) }) + +test_that("combine embedded efs results", { + task = tsk("sonar") + with_seed(42, { + efsr1 = embedded_ensemble_fselect( + task = task, + learners = lrns(c("classif.rpart", "classif.featureless")), + init_resampling = rsmp("subsampling", repeats = 2), + measure = msr("classif.ce") + ) + }) + + with_seed(43, { + efsr2 = embedded_ensemble_fselect( + task = task, + learners = lrns(c("classif.rpart", "classif.featureless")), + init_resampling = rsmp("subsampling", repeats = 3), + measure = msr("classif.ce") + ) + }) + + comb1 = efsr1$clone(deep = TRUE)$combine(efsr2) + comb2 = c(efsr1, efsr2) + + expect_class(comb1, "EnsembleFSResult") + expect_class(comb2, "EnsembleFSResult") + expect_data_table(comb1$result, nrows = 10L) + expect_data_table(comb2$result, nrows = 10L) + expect_equal(comb1$n_learners, 2L) + expect_equal(comb2$n_learners, 2L) + expect_equal(get_private(comb1)$.measure$id, "classif.ce") + expect_equal(get_private(comb2)$.measure$id, "classif.ce") + expect_null(get_private(comb1)$.inner_measure) + expect_null(get_private(comb2)$.inner_measure) + assert_benchmark_result(comb1$benchmark_result) + assert_benchmark_result(comb2$benchmark_result) + expect_equal(comb1$benchmark_result$n_resample_results, 4L) + expect_equal(comb2$benchmark_result$n_resample_results, 4L) + expect_equal(nrow(get_private(comb1$benchmark_result)$.data$data$fact), 10L) + expect_equal(nrow(get_private(comb2$benchmark_result)$.data$data$fact), 10L) +}) diff --git a/tests/testthat/test_ensemble_fselect.R b/tests/testthat/test_ensemble_fselect.R index edc9d975..d88cec0f 100644 --- a/tests/testthat/test_ensemble_fselect.R +++ b/tests/testthat/test_ensemble_fselect.R @@ -219,6 +219,117 @@ test_that("EnsembleFSResult initialization", { expect_false(efsr$measure$minimize) }) +test_that("combining EnsembleFSResult objects", { + selected_features = list( + c("V3", "V20"), + c("V3", "V5", "V19", "V15"), + c("V11", "V7", "V6", "V8"), + c("V11"), + c("V17", "V2", "V12", "V9", "V1"), + c("V11", "V18", "V9") + ) + feats = paste0("V", 1:20) + + res1 = data.table( + resampling_iteration = c(1, 1, 2, 2, 3, 3), + learner_id = rep(c("lrn1", "lrn2"), 3), + n_features = c(2, 4, 4, 1, 5, 3), + features = selected_features, + classif.ce = runif(6), + classif.acc_inner = runif(6) # inner measure has the `_inner` end-fix + ) + + # same result, just different learners + res2 = data.table( + resampling_iteration = c(1, 1, 2, 2, 3, 3), + learner_id = rep(c("lrn3", "lrn4"), 3), + n_features = c(2, 4, 4, 1, 5, 3), + features = selected_features, + classif.ce = runif(6), + classif.acc_inner = runif(6) # inner measure has the `_inner` end-fix + ) + + # no `inner_measure` + res3 = res2[, -c("classif.acc_inner")] + # different `measure` + res4 = setnames(copy(res3), "classif.ce", "classif.auc") + # different `inner_measure` + res5 = setnames(copy(res2), "classif.acc_inner", "classif.ce_inner") + + # initialize efsr objects + m1 = msr("classif.ce") + m2 = msr("classif.acc") + m3 = msr("classif.auc") + efsr1 = EnsembleFSResult$new(res1, features = feats, measure = m1, inner_measure = m2) + efsr2 = EnsembleFSResult$new(res2, features = feats, measure = m1, inner_measure = m2) + efsr3 = EnsembleFSResult$new(res3, features = feats, measure = m1) + efsr4 = EnsembleFSResult$new(res4, features = feats, measure = m3) + efsr5 = EnsembleFSResult$new(res5, features = feats, measure = m1, inner_measure = m1) + + # combine efsr with nothing gives the same object back deep-cloned + efsr11 = c(efsr1) + assert_class(efsr11, "EnsembleFSResult") + expect_equal(efsr1$result$classif.ce, efsr11$result$classif.ce) + + # combine efsrs with same inner and outer measures + comb1 = efsr1$clone(deep = TRUE)$combine(efsr2) + comb11 = c(efsr1, efsr2) # same as above + # efsr1 doesn't change + expect_data_table(efsr1$result, nrows = 6L) + expect_equal(efsr1$n_learners, 2L) + expect_equal(get_private(efsr1)$.measure$id, "classif.ce") + expect_equal(get_private(efsr1)$.inner_measure$id, "classif.acc") + # efsr2 doesn't change either + expect_data_table(efsr2$result, nrows = 6) + expect_equal(efsr2$n_learners, 2) + expect_equal(get_private(efsr2)$.measure$id, "classif.ce") + expect_equal(get_private(efsr2)$.inner_measure$id, "classif.acc") + # combined object has more rows + expect_data_table(comb1$result, nrows = 12L) + expect_data_table(comb11$result, nrows = 12L) + expect_equal(comb1$n_learners, 4L) + expect_equal(comb11$n_learners, 4L) + expect_equal(get_private(comb1)$.measure$id, "classif.ce") + expect_equal(get_private(comb11)$.measure$id, "classif.ce") + expect_equal(get_private(comb1)$.inner_measure$id, "classif.acc") + expect_equal(get_private(comb11)$.inner_measure$id, "classif.acc") + + # no `inner_measure` in the 2nd efsr + comb2 = efsr1$clone(deep = TRUE)$combine(efsr3) + comb22 = c(efsr1, efsr3) + expect_equal(get_private(efsr1)$.measure$id, "classif.ce") + expect_equal(get_private(efsr1)$.inner_measure$id, "classif.acc") + expect_null(get_private(efsr3)$.inner_measure) + expect_data_table(comb2$result, nrows = 12L) + expect_data_table(comb22$result, nrows = 12L) + expect_equal(comb2$n_learners, 4L) + expect_equal(comb22$n_learners, 4L) + expect_equal(get_private(comb2)$.measure$id, "classif.ce") + expect_equal(get_private(comb22)$.measure$id, "classif.ce") + expect_null(get_private(comb2)$.inner_measure$id) + expect_null(get_private(comb22)$.inner_measure$id) + + # different (outer) measure => not possible to combine + expect_error(efsr1$clone(deep = TRUE)$combine(efsr4)) + + # different `inner_measure` + comb3 = efsr1$clone(deep = TRUE)$combine(efsr5) + expect_data_table(comb3$result, nrows = 12L) + expect_equal(comb3$n_learners, 4L) + expect_equal(get_private(comb3)$.measure$id, "classif.ce") + expect_null(get_private(comb3)$.inner_measure$id) + # `inner_measure`s of the individual objects did not change + expect_equal(get_private(efsr1)$.inner_measure$id, "classif.acc") + expect_equal(get_private(efsr5)$.inner_measure$id, "classif.ce") + + # multi-combine works + comb_all = c(efsr1, efsr2, efsr3, efsr5) + expect_data_table(comb_all$result, nrows = 24L) + expect_equal(comb_all$n_learners, 4L) + expect_equal(get_private(comb_all)$.measure$id, "classif.ce") + expect_null(get_private(comb_all)$.inner_measure$id) +}) + test_that("different callbacks can be set", { callback_test = callback_batch_fselect("mlr3fselect.test", on_eval_before_archive = function(callback, context) {