From 6d032dcc0454a641f223c9993b015f710db35769 Mon Sep 17 00:00:00 2001
From: John Zobolas <bblodfon@users.noreply.github.com>
Date: Fri, 21 Jun 2024 19:43:53 +0200
Subject: [PATCH] efs updates (#105)

* export class

* correct doc

* add args minimize, measure_var and active field nlearners

* add pareto_front method

* add export in NAMESPACE

* improve doc a bit

* update docs

* add more tests

* Update R/EnsembleFSResult.R

Co-authored-by: Marc Becker <33069354+be-marc@users.noreply.github.com>

* Update R/EnsembleFSResult.R

Co-authored-by: Marc Becker <33069354+be-marc@users.noreply.github.com>

* Update R/EnsembleFSResult.R

Co-authored-by: Marc Becker <33069354+be-marc@users.noreply.github.com>

* fix variable names

* order also by measure (pareto points may have the same num of features)

* more realistic pareto front in the test

* add Das 'knee' paper

* add "knee_points" method

* fix method name

* change stability args to a list

* check stability_args

* style change

* fix test

* supress warnings

* 'featureless' learner produces same number of features as best in the RFE

* update docs

* correct method name and update docs

* add tests for knee_points

* fix bug in "knee_points" (didn't work properly for measures like accuracy)

---------

Co-authored-by: Marc Becker <33069354+be-marc@users.noreply.github.com>
---
 R/EnsembleFSResult.R                   | 179 +++++++++++++++++++++++--
 R/bibentries.R                         |  14 ++
 R/ensemble_fselect.R                   |   4 +-
 man/ensemble_fs_result.Rd              | 106 ++++++++++++++-
 tests/testthat/test_ensemble_fselect.R |  77 +++++++++--
 5 files changed, 356 insertions(+), 24 deletions(-)

diff --git a/R/EnsembleFSResult.R b/R/EnsembleFSResult.R
index da9a6d3c..f689232b 100644
--- a/R/EnsembleFSResult.R
+++ b/R/EnsembleFSResult.R
@@ -4,7 +4,7 @@
 #'
 #' @description
 #' The `EnsembleFSResult` stores the results of ensemble feature selection.
-#' It includes methods for evaluating the stability of the feature selection process and for ranking the selected features.
+#' It includes methods for evaluating the stability of the feature selection process and for ranking the selected features among others.
 #' The function [ensemble_fselect()] returns an object of this class.
 #'
 #' @section S3 Methods:
@@ -15,6 +15,9 @@
 #'     * `benchmark_result` (`logical(1)`)\cr
 #'       Whether to add the learner, task and resampling information from the benchmark result.
 #'
+#' @references
+#' `r format_bib("das1999")`
+#'
 #' @export
 #' @examples
 #' \donttest{
@@ -39,6 +42,9 @@
 #'
 #'   # returns a ranking of all features
 #'   head(efsr$feature_ranking())
+#'
+#'   # returns the empirical pareto front (nfeatures vs error)
+#'   efsr$pareto_front()
 #' }
 EnsembleFSResult = R6Class("EnsembleFSResult",
   public = list(
@@ -56,18 +62,26 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     #'
     #' @param result ([data.table::data.table])\cr
     #'  The result of the ensemble feature selection.
-    #'  Column names should include `"resampling_id"`, `"learner_id"`, `"features"`
+    #'  Column names should include `"resampling_iteration"`, `"learner_id"`, `"features"`
     #'  and `"n_features"`.
     #' @param features ([character()])\cr
     #'  The vector of features of the task that was used in the ensemble feature
     #'  selection.
     #' @param benchmark_result ([mlr3::BenchmarkResult])\cr
     #'  The benchmark result object.
-    initialize = function(result, features, benchmark_result = NULL) {
+    #' @param measure_id (`character(1)`)\cr
+    #'  Column name of `"result"` that corresponds to the measure used.
+    #' @param minimize (`logical(1)`)\cr
+    #'  If `TRUE` (default), lower values of the measure correspond to higher performance.
+    initialize = function(result, features, benchmark_result = NULL, measure_id,
+                          minimize = TRUE) {
       assert_data_table(result)
-      assert_names(names(result), must.include = c("resampling_iteration", "learner_id", "features", "n_features"))
+      private$.measure_id = assert_string(measure_id, null.ok = FALSE)
+      mandatory_columns = c("resampling_iteration", "learner_id", "features", "n_features")
+      assert_names(names(result), must.include = c(mandatory_columns, measure_id))
       private$.result = result
       private$.features = assert_character(features, any.missing = FALSE, null.ok = FALSE)
+      private$.minimize = assert_logical(minimize, null.ok = FALSE)
       self$benchmark_result = if (!is.null(benchmark_result)) assert_benchmark_result(benchmark_result)
 
       self$man = "mlr3fselect::ensemble_fs_result"
@@ -144,7 +158,7 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     #'  The stability measure to be used.
     #'  One of the measures returned by [stabm::listStabilityMeasures()] in lower case.
     #'  Default is `"jaccard"`.
-    #' @param ... (`any`)\cr
+    #' @param stability_args (`list`)\cr
     #'  Additional arguments passed to the stability measure function.
     #' @param global (`logical(1)`)\cr
     #'  Whether to calculate the stability globally or for each learner.
@@ -153,10 +167,16 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     #'
     #' @return A `numeric()` value representing the stability of the selected features.
     #' Or a `numeric()` vector with the stability of the selected features for each learner.
-    stability = function(stability_measure = "jaccard", ..., global = TRUE, reset_cache = FALSE) {
+    stability = function(
+      stability_measure = "jaccard",
+      stability_args = NULL,
+      global = TRUE,
+      reset_cache = FALSE
+      ) {
       funs = stabm::listStabilityMeasures()$Name
       keys = tolower(gsub("stability", "", funs))
       assert_choice(stability_measure, choices = keys)
+      assert_list(stability_args, null.ok = TRUE, names = "named")
 
       if (global) {
         # cached results
@@ -165,7 +185,7 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
         }
 
         fun = get(funs[which(stability_measure == keys)], envir = asNamespace("stabm"))
-        private$.stability_global[[stability_measure]] = fun(private$.result$features, ...)
+        private$.stability_global[[stability_measure]] = invoke(fun, features = private$.result$features, .args = stability_args)
         private$.stability_global[[stability_measure]]
       } else {
         # cached results
@@ -175,10 +195,133 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
 
         fun = get(funs[which(stability_measure == keys)], envir = asNamespace("stabm"))
 
-        tab = private$.result[, list(score = fun(.SD$features, ...)), by = learner_id]
+        tab = private$.result[, list(score = invoke(fun, features = .SD$features, .args = stability_args)), by = learner_id]
         private$.stability_learner[[stability_measure]] = set_names(tab$score, tab$learner_id)
         private$.stability_learner[[stability_measure]]
       }
+    },
+
+    #' @description
+    #'
+    #' This function identifies the **Pareto front** of the ensemble feature
+    #' selection process, i.e., the set of points that represent the trade-off
+    #' between the number of features and performance (e.g. classification error).
+    #'
+    #' @param type (`character(1)`)\cr
+    #'  Specifies the type of Pareto front to return. See details.
+    #'
+    #' @details
+    #' Two options are available for the Pareto front:
+    #' - `"empirical"` (default): returns the empirical Pareto front.
+    #' - `"estimated"`: the Pareto front points are estimated by fitting a linear model with the inversed of the number of features (\eqn{1/x}) as input and the associated performance scores as output.
+    #'  This method is useful when the Pareto points are sparse and the front  assumes a convex shape if better performance corresponds to lower measure values (e.g. classification error), or a concave shape otherwise (e.g. classification accuracy).
+    #'  The `estimated` Pareto front will include points for a number of features ranging from 1 up to the maximum number found in the empirical Pareto front.
+    #'
+    #' @return A [data.table::data.table] with columns the number of features and the performance that together form the Pareto front.
+    pareto_front = function(type = "empirical") {
+      assert_choice(type, choices =  c("empirical", "estimated"))
+      result = private$.result
+      measure_id = private$.measure_id
+      minimize = private$.minimize
+
+      # Keep only n_features and performance scores
+      cols_to_keep = c("n_features", measure_id)
+      data = result[, ..cols_to_keep]
+
+      # Order data according to the measure
+      data = if (minimize)
+        data[order(n_features, -get(measure_id))]
+      else
+        data[order(n_features, get(measure_id))]
+
+      # Initialize the Pareto front
+      pf = data.table(n_features = numeric(0))
+      pf[, (measure_id) := numeric(0)]
+
+      # Initialize the best performance to a large number so
+      # that the Pareto front has at least one point
+      best_score = if (minimize) Inf else -Inf
+
+      for (i in seq_row(data)) {
+        # Determine the condition based on minimize
+        if (minimize) {
+          condition = data[[measure_id]][i] < best_score
+        } else {
+          condition = data[[measure_id]][i] > best_score
+        }
+
+        if (condition) {
+          pf = rbind(pf, data[i])
+          best_score = data[[measure_id]][i]
+        }
+      }
+
+      if (type == "estimated") {
+        # Transform the data (x => 1/x)
+        pf[, n_features_inv := 1 / n_features]
+
+        # Fit the linear model
+        form = mlr3misc::formulate(lhs = measure_id, rhs = "n_features_inv")
+        model = stats::lm(formula = form, data = pf)
+
+        # Predict values using the model to create a smooth curve
+        pf_pred = data.table(n_features = seq(1, max(data$n_features)))
+        pf_pred[, n_features_inv := 1 / n_features]
+        pf_pred[, (measure_id) := predict(model, newdata = pf_pred)]
+        pf_pred$n_features_inv = NULL
+        pf = pf_pred
+      }
+
+      pf
+    },
+
+    #' @description
+    #'
+    #' This function implements various *knee* point identification (KPI) methods, which select points in the Pareto front, such that an optimal trade-off between performance and number of features is achieved.
+    #' In most cases, only one such point is returned.
+    #'
+    #' @details
+    #' The available KPI methods are:
+    #'
+    #' - `"NBI"` (default): The **Normal-Boundary Intersection** method is a geometry-based method which calculates the perpendicular distance of each point from the line connecting the first and last points of the Pareto front.
+    #' The knee point is determined as the Pareto point with the maximum distance from this line, see Das (1999).
+    #'
+    #' @param method (`character(1)`)\cr
+    #'  Type of method to use to identify the knee point. See details.
+    #' @param type (`character(1)`)\cr
+    #'  Specifies the type of Pareto front to use for the identification of the knee point.
+    #'  See `pareto_front()` method for more details.
+    #'
+    #' @return A [data.table::data.table] with the knee point(s) of the Pareto front.
+    knee_points = function(method = "NBI", type = "empirical") {
+      assert_choice(method, choices = c("NBI"))
+      assert_choice(type, choices = c("empirical", "estimated"))
+      measure_id = private$.measure_id
+      minimize = private$.minimize
+
+      pf = if (type == "empirical") self$pareto_front() else self$pareto_front(type = "estimated")
+
+      # Scale the Pareto front data to (0-1) range
+      pf_norm = pf[, .(
+        nfeats_norm = (n_features - min(n_features)) /(max(n_features) - min(n_features)),
+        perf_norm = (get(measure_id) - min(get(measure_id))) / (max(get(measure_id)) - min(get(measure_id)))
+      )]
+
+      if (minimize) {
+        # The two edge points in the Pareto front are: (0,1) and (1,0)
+        # They define the line (x + y - 1 = 0) and their distance is sqrt(2)
+        pf_norm[, dist_to_line := abs(nfeats_norm + perf_norm - 1)/sqrt(2)]
+      } else {
+        # The two edge points in the Pareto front are: (0,0) and (1,1)
+        # They define the line (y - x = 0) and their distance is sqrt(2)
+        pf_norm[, dist_to_line := abs(nfeats_norm - perf_norm)/sqrt(2)]
+      }
+
+      # knee point is the one with the maximum distance
+      knee_index = which_max(pf_norm[, dist_to_line], ties_method = "first")
+      knee_point = pf[knee_index]
+
+      knee_point
     }
   ),
 
@@ -191,15 +334,31 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
       if (is.null(self$benchmark_result)) return(private$.result)
       tab = as.data.table(self$benchmark_result)[, c("task", "learner", "resampling"), with = FALSE]
       cbind(private$.result, tab)
+    },
+
+    #' @field n_learners (`numeric(1)`)\cr
+    #' Returns the number of learners used in the ensemble feature selection.
+    n_learners = function(rhs) {
+      assert_ro_binding(rhs)
+      uniqueN(private$.result$learner_id)
+    },
+
+    #' @field measure (`character(1)`)\cr
+    #' Returns the measure id used in the ensemble feature selection.
+    measure = function(rhs) {
+      assert_ro_binding(rhs)
+      private$.measure_id
     }
   ),
 
   private = list(
-    .result = NULL,
+    .result = NULL, # with no R6 classes
     .stability_global = NULL,
     .stability_learner = NULL,
     .feature_ranking = NULL,
-    .features = NULL
+    .features = NULL,
+    .measure_id = NULL,
+    .minimize = NULL
   )
 )
 
diff --git a/R/bibentries.R b/R/bibentries.R
index a7ecb1df..71e4d111 100644
--- a/R/bibentries.R
+++ b/R/bibentries.R
@@ -105,5 +105,19 @@ bibentries = c(
     title     = "Ensemble feature selection for high-dimensional data: a stability analysis across multiple domains",
     volume    = "32",
     year      = "2020"
+  ),
+
+  das1999 = bibentry("article",
+    author    = "Das, I",
+    doi       = "10.1007/BF01195985/METRICS",
+    issn      = "09344373",
+    journal   = "Structural Optimization",
+    month     = "may",
+    number    = "1-2",
+    pages     = "107--115",
+    publisher = "Springer",
+    title     = "On characterizing the 'knee' of the Pareto curve based on normal-boundary intersection",
+    volume    = "18",
+    year      = "1999"
   )
 )
diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
index afde7803..940dc2f5 100644
--- a/R/ensemble_fselect.R
+++ b/R/ensemble_fselect.R
@@ -141,6 +141,8 @@ ensemble_fselect = function(
   EnsembleFSResult$new(
     result = grid,
     features = task$feature_names,
-    benchmark_result = if (store_benchmark_result) bmr
+    benchmark_result = if (store_benchmark_result) bmr,
+    measure_id = measure$id,
+    minimize = measure$minimize
   )
 }
diff --git a/man/ensemble_fs_result.Rd b/man/ensemble_fs_result.Rd
index d5720cfb..63a0cbc3 100644
--- a/man/ensemble_fs_result.Rd
+++ b/man/ensemble_fs_result.Rd
@@ -6,7 +6,7 @@
 \title{Ensemble Feature Selection Result}
 \description{
 The \code{EnsembleFSResult} stores the results of ensemble feature selection.
-It includes methods for evaluating the stability of the feature selection process and for ranking the selected features.
+It includes methods for evaluating the stability of the feature selection process and for ranking the selected features among others.
 The function \code{\link[=ensemble_fselect]{ensemble_fselect()}} returns an object of this class.
 }
 \section{S3 Methods}{
@@ -46,7 +46,16 @@ Whether to add the learner, task and resampling information from the benchmark r
 
   # returns a ranking of all features
   head(efsr$feature_ranking())
+
+  # returns the empirical pareto front (nfeatures vs error)
+  efsr$pareto_front()
+}
 }
+\references{
+Das, I (1999).
+\dQuote{On characterizing the 'knee' of the Pareto curve based on normal-boundary intersection.}
+\emph{Structural Optimization}, \bold{18}(1-2), 107--115.
+ISSN 09344373, \doi{10.1007/BF01195985/METRICS}.
 }
 \section{Public fields}{
 \if{html}{\out{<div class="r6-fields">}}
@@ -64,6 +73,12 @@ Manual page for this object.}
 \describe{
 \item{\code{result}}{(\link[data.table:data.table]{data.table::data.table})\cr
 Returns the result of the ensemble feature selection.}
+
+\item{\code{n_learners}}{(\code{numeric(1)})\cr
+Returns the number of learners used in the ensemble feature selection.}
+
+\item{\code{measure}}{(\code{character(1)})\cr
+Returns the measure id used in the ensemble feature selection.}
 }
 \if{html}{\out{</div>}}
 }
@@ -76,6 +91,8 @@ Returns the result of the ensemble feature selection.}
 \item \href{#method-EnsembleFSResult-help}{\code{EnsembleFSResult$help()}}
 \item \href{#method-EnsembleFSResult-feature_ranking}{\code{EnsembleFSResult$feature_ranking()}}
 \item \href{#method-EnsembleFSResult-stability}{\code{EnsembleFSResult$stability()}}
+\item \href{#method-EnsembleFSResult-pareto_front}{\code{EnsembleFSResult$pareto_front()}}
+\item \href{#method-EnsembleFSResult-knee_points}{\code{EnsembleFSResult$knee_points()}}
 \item \href{#method-EnsembleFSResult-clone}{\code{EnsembleFSResult$clone()}}
 }
 }
@@ -85,7 +102,13 @@ Returns the result of the ensemble feature selection.}
 \subsection{Method \code{new()}}{
 Creates a new instance of this \link[R6:R6Class]{R6} class.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$new(result, features, benchmark_result = NULL)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$new(
+  result,
+  features,
+  benchmark_result = NULL,
+  measure_id,
+  minimize = TRUE
+)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
@@ -93,7 +116,7 @@ Creates a new instance of this \link[R6:R6Class]{R6} class.
 \describe{
 \item{\code{result}}{(\link[data.table:data.table]{data.table::data.table})\cr
 The result of the ensemble feature selection.
-Column names should include \code{"resampling_id"}, \code{"learner_id"}, \code{"features"}
+Column names should include \code{"resampling_iteration"}, \code{"learner_id"}, \code{"features"}
 and \code{"n_features"}.}
 
 \item{\code{features}}{(\code{\link[=character]{character()}})\cr
@@ -102,6 +125,12 @@ selection.}
 
 \item{\code{benchmark_result}}{(\link[mlr3:BenchmarkResult]{mlr3::BenchmarkResult})\cr
 The benchmark result object.}
+
+\item{\code{measure_id}}{(\code{character(1)})\cr
+Column name of \code{"result"} that corresponds to the measure used.}
+
+\item{\code{minimize}}{(\code{logical(1)})\cr
+If \code{TRUE} (default), lower values of the measure correspond to higher performance.}
 }
 \if{html}{\out{</div>}}
 }
@@ -187,7 +216,7 @@ When the same stability measure is requested again with different arguments, the
 \subsection{Usage}{
 \if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$stability(
   stability_measure = "jaccard",
-  ...,
+  stability_args = NULL,
   global = TRUE,
   reset_cache = FALSE
 )}\if{html}{\out{</div>}}
@@ -201,7 +230,7 @@ The stability measure to be used.
 One of the measures returned by \code{\link[stabm:listStabilityMeasures]{stabm::listStabilityMeasures()}} in lower case.
 Default is \code{"jaccard"}.}
 
-\item{\code{...}}{(\code{any})\cr
+\item{\code{stability_args}}{(\code{list})\cr
 Additional arguments passed to the stability measure function.}
 
 \item{\code{global}}{(\code{logical(1)})\cr
@@ -218,6 +247,73 @@ Or a \code{numeric()} vector with the stability of the selected features for eac
 }
 }
 \if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-EnsembleFSResult-pareto_front"></a>}}
+\if{latex}{\out{\hypertarget{method-EnsembleFSResult-pareto_front}{}}}
+\subsection{Method \code{pareto_front()}}{
+This function identifies the \strong{Pareto front} of the ensemble feature
+selection process, i.e., the set of points that represent the trade-off
+between the number of features and performance (e.g. classification error).
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$pareto_front(type = "empirical")}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{type}}{(\code{character(1)})\cr
+Specifies the type of Pareto front to return. See details.}
+}
+\if{html}{\out{</div>}}
+}
+\subsection{Details}{
+Two options are available for the Pareto front:
+\itemize{
+\item \code{"empirical"} (default): returns the empirical Pareto front.
+\item \code{"estimated"}: the Pareto front points are estimated by fitting a linear model with the inversed of the number of features (\eqn{1/x}) as input and the associated performance scores as output.
+This method is useful when the Pareto points are sparse and the front  assumes a convex shape if better performance corresponds to lower measure values (e.g. classification error), or a concave shape otherwise (e.g. classification accuracy).
+The \code{estimated} Pareto front will include points for a number of features ranging from 1 up to the maximum number found in the empirical Pareto front.
+}
+}
+
+\subsection{Returns}{
+A \link[data.table:data.table]{data.table::data.table} with columns the number of features and the performance that together form the Pareto front.
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-EnsembleFSResult-knee_points"></a>}}
+\if{latex}{\out{\hypertarget{method-EnsembleFSResult-knee_points}{}}}
+\subsection{Method \code{knee_points()}}{
+This function implements various \emph{knee} point identification (KPI) methods, which select points in the Pareto front, such that an optimal trade-off between performance and number of features is achieved.
+In most cases, only one such point is returned.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$knee_points(method = "NBI", type = "empirical")}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{method}}{(\code{character(1)})\cr
+Type of method to use to identify the knee point. See details.}
+
+\item{\code{type}}{(\code{character(1)})\cr
+Specifies the type of Pareto front to use for the identification of the knee point.
+See \code{pareto_front()} method for more details.}
+}
+\if{html}{\out{</div>}}
+}
+\subsection{Details}{
+The available KPI methods are:
+\itemize{
+\item \code{"NBI"} (default): The \strong{Normal-Boundary Intersection} method is a geometry-based method which calculates the perpendicular distance of each point from the line connecting the first and last points of the Pareto front.
+The knee point is determined as the Pareto point with the maximum distance from this line, see Das (1999).
+}
+}
+
+\subsection{Returns}{
+A \link[data.table:data.table]{data.table::data.table} with the knee point(s) of the Pareto front.
+}
+}
+\if{html}{\out{<hr>}}
 \if{html}{\out{<a id="method-EnsembleFSResult-clone"></a>}}
 \if{latex}{\out{\hypertarget{method-EnsembleFSResult-clone}{}}}
 \subsection{Method \code{clone()}}{
diff --git a/tests/testthat/test_ensemble_fselect.R b/tests/testthat/test_ensemble_fselect.R
index d6a03a0a..c0c68f16 100644
--- a/tests/testthat/test_ensemble_fselect.R
+++ b/tests/testthat/test_ensemble_fselect.R
@@ -16,9 +16,12 @@ test_that("ensemble feature selection works", {
   expect_vector(efsr$result$n_features, size = 4)
   expect_vector(efsr$result$classif.ce, size = 4)
   expect_benchmark_result(efsr$benchmark_result)
+  expect_equal(efsr$measure, "classif.ce")
+  expect_equal(efsr$n_learners, 2)
 
   # stability
   expect_number(efsr$stability(stability_measure = "jaccard"))
+  expect_error(efsr$stability(stability_args = list(20)), "have names")
   stability = efsr$stability(stability_measure = "jaccard", global = FALSE)
   expect_numeric(stability, len = 2)
   expect_names(names(stability), identical.to = c("classif.rpart", "classif.featureless"))
@@ -28,6 +31,19 @@ test_that("ensemble feature selection works", {
   expect_data_table(feature_ranking, nrows = length(task$feature_names))
   expect_names(names(feature_ranking), identical.to = c("feature", "inclusion_probability"))
 
+  # pareto_front
+  pf = efsr$pareto_front()
+  expect_data_table(pf)
+  expect_equal(names(pf), c("n_features", "classif.ce"))
+  pf_pred = suppressWarnings(efsr$pareto_front(type = "estimated"))
+  expect_data_table(pf_pred, nrows = max(efsr$result$n_features))
+  expect_equal(names(pf_pred), c("n_features", "classif.ce"))
+
+  # knee_points
+  kps = efsr$knee_points()
+  expect_data_table(kps, min.rows = 1)
+  expect_equal(names(kps), c("n_features", "classif.ce"))
+
   # data.table conversion
   tab = as.data.table(efsr)
   expect_names(names(tab), identical.to = c("resampling_iteration", "learner_id", "features", "n_features", "classif.ce", "task", "learner", "resampling"))
@@ -52,6 +68,8 @@ test_that("ensemble feature selection works without benchmark result", {
   expect_vector(efsr$result$n_features, size = 4)
   expect_vector(efsr$result$classif.ce, size = 4)
   expect_null(efsr$benchmark_result)
+  expect_equal(efsr$measure, "classif.ce")
+  expect_equal(efsr$n_learners, 2)
 
   # stability
   expect_number(efsr$stability(stability_measure = "jaccard"))
@@ -64,6 +82,19 @@ test_that("ensemble feature selection works without benchmark result", {
   expect_data_table(feature_ranking, nrows = length(task$feature_names))
   expect_names(names(feature_ranking), identical.to = c("feature", "inclusion_probability"))
 
+  # pareto_front
+  pf = efsr$pareto_front()
+  expect_data_table(pf)
+  expect_equal(names(pf), c("n_features", "classif.ce"))
+  pf_pred = suppressWarnings(efsr$pareto_front(type = "estimated"))
+  expect_data_table(pf_pred, nrows = max(efsr$result$n_features))
+  expect_equal(names(pf_pred), c("n_features", "classif.ce"))
+
+  # knee_points
+  kps = efsr$knee_points(type = "estimated")
+  expect_data_table(kps, min.rows = 1)
+  expect_equal(names(kps), c("n_features", "classif.ce"))
+
   # data.table conversion
   tab = as.data.table(efsr)
   expect_names(names(tab), identical.to = c("resampling_iteration", "learner_id", "features", "n_features", "classif.ce"))
@@ -88,6 +119,8 @@ test_that("ensemble feature selection works with rfe", {
   expect_vector(efsr$result$classif.ce, size = 4)
   expect_list(efsr$result$importance, any.missing = FALSE, len = 4)
   expect_benchmark_result(efsr$benchmark_result)
+  expect_equal(efsr$measure, "classif.ce")
+  expect_equal(efsr$n_learners, 2)
 
   # stability
   expect_number(efsr$stability(stability_measure = "jaccard"))
@@ -100,25 +133,53 @@ test_that("ensemble feature selection works with rfe", {
   expect_data_table(feature_ranking, nrows = length(task$feature_names))
   expect_names(names(feature_ranking), identical.to = c("feature", "inclusion_probability"))
 
+  # pareto_front
+  pf = efsr$pareto_front()
+  expect_data_table(pf)
+  expect_equal(names(pf), c("n_features", "classif.ce"))
+  pf_pred = suppressWarnings(efsr$pareto_front(type = "estimated"))
+  expect_data_table(pf_pred, nrows = max(efsr$result$n_features))
+  expect_equal(names(pf_pred), c("n_features", "classif.ce"))
+
+  # knee_points
+  kps = efsr$knee_points(type = "estimated")
+  expect_data_table(kps, min.rows = 1)
+  expect_equal(names(kps), c("n_features", "classif.ce"))
+
   # data.table conversion
   tab = as.data.table(efsr)
   expect_names(names(tab), identical.to = c("resampling_iteration", "learner_id", "features", "n_features", "classif.ce", "importance", "task", "learner", "resampling"))
 })
 
 test_that("EnsembleFSResult initialization", {
-  features = LETTERS
-  result = data.table(a = 1) # not proper column name
-  expect_error(EnsembleFSResult$new(result = result, features = features))
+  result = data.table(a = 1, b = 3)
+  expect_error(EnsembleFSResult$new(result = result, features = LETTERS, measure_id = "a"), "missing elements")
+
+  result = data.table(
+    resampling_iteration = c(1, 1, 1, 2, 2, 2, 3, 3, 3),
+    learner_id = rep(c("classif.xgboost", "classif.rpart", "classif.ranger"), 3),
+    n_features = c(2, 4, 4, 1, 5, 4, 1, 2, 4),
+    features = list(
+      c("V3", "V20"),
+      c("V3", "V5", "V19", "V15"),
+      c("V11", "V7", "V6", "V8"),
+      c("V11"),
+      c("V17", "V2", "V12", "V9", "V1"),
+      c("V11", "V18", "V9", "V2"),
+      c("V2"),
+      c("V4", "V12"),
+      c("V6", "V15", "V19", "V7")),
+    classif.ce = c(0.13, 0.24, 0.16, 0.11, 0.25, 0.18, 0.15, 0.1, 0.16)
+  )
 
-  result = data.table(resampling_iteration = 1:2, learner_id = list("l1", "l2"),
-                      features = list(LETTERS[1], LETTERS[1:3]),
-                      n_features = c(1,3))
   # works without benchmark result object
-  efsr = EnsembleFSResult$new(result = result, features = features)
+  efsr = EnsembleFSResult$new(result = result, features = paste0("V", 1:20), measure_id = "classif.ce")
   expect_class(efsr, "EnsembleFSResult")
+  expect_equal(efsr$n_learners, 3)
   tab = as.data.table(efsr)
   expect_data_table(tab)
-  expect_names(names(tab), identical.to = c("resampling_iteration", "learner_id", "features", "n_features"))
+  expect_names(names(tab), identical.to = c("resampling_iteration", "learner_id",
+                                            "n_features", "features", "classif.ce"))
 })
 
 test_that("different callbacks can be set", {