probabl-ai · thomass-dev · Feb 4, 2025 · Feb 5, 2025 · Feb 5, 2025 · Feb 5, 2025
diff --git a/skore/src/skore/sklearn/_comparison/report.py b/skore/src/skore/sklearn/_comparison/report.py
@@ -105,13 +105,30 @@ def __init__(
         We check that the estimator reports can be compared:
         - all reports are estimator reports,
         - all estimators are in the same ML use case,
-        - all X_test, y_test have the same hash.
+        - all estimators have non-empty X_test and y_test,
+        - all estimators have the same X_test and y_test.
         """
         if len(reports) < 2:
             raise ValueError("At least 2 instances of EstimatorReport are needed")
 
-        if not all(isinstance(report, EstimatorReport) for report in reports):
-            raise TypeError("Only instances of EstimatorReport are allowed")
+        ml_tasks = set()
+        test_dataset_hashes = set()
+
+        for report in reports:
+            if not isinstance(report, EstimatorReport):
+                raise TypeError("Only instances of EstimatorReport are allowed")
+
+            if (report.X_test is None) or (report.y_test is None):
+                raise ValueError("Cannot compare reports without testing data")
+
+            ml_tasks.add(report._ml_task)
+            test_dataset_hashes.add(joblib.hash((report.X_test, report.y_test)))
+
+            if len(ml_tasks) > 1:
+                raise ValueError("Not all estimators are in the same ML usecase")
+
+            if len(test_dataset_hashes) > 1:
+                raise ValueError("Not all estimators have the same testing data")
 
         if report_names is None:
             self.report_names_ = [report.estimator_name_ for report in reports]
@@ -124,34 +141,13 @@ def __init__(
 
         self.estimator_reports_ = deepcopy(reports)
 
-        first_report = self.estimator_reports_[0]
-        first_report_ml_task = first_report._ml_task
-        first_report_test_hash = joblib.hash((first_report.X_test, first_report.y_test))
-
-        for report in self.estimator_reports_[1:]:
-            if report._ml_task != first_report_ml_task:
-                raise ValueError("Not all estimators are in the same ML usecase")
-
-            if joblib.hash((report.X_test, report.y_test)) != first_report_test_hash:
-                raise ValueError("Not all estimators have the same testing data")
-
-        if (first_report.X_test is None) or (first_report.y_test is None):
-            warn(
-                "MissingTestDataWarning",
-                (
-                    "We cannot ensure that all estimators have been tested "
-                    "with the same dataset. This could lead to incoherent comparisons."
-                ),
-            )
-
         # NEEDED FOR METRICS ACCESSOR
         self.n_jobs = n_jobs
         self._rng = np.random.default_rng(time.time_ns())
         self._hash = self._rng.integers(
             low=np.iinfo(np.int64).min, high=np.iinfo(np.int64).max
         )
         self._cache = {}
-
         self._ml_task = self.estimator_reports_[0]._ml_task
 
     ####################################################################################

diff --git a/skore/tests/unit/sklearn/test_comparison.py b/skore/tests/unit/sklearn/test_comparison.py
@@ -32,8 +32,13 @@ def usecase(
 def test_comparison_report_init_wrong_parameters():
     """If the input is not valid, raise."""
 
-    estimator, _, _, _, _ = usecase("binary-logistic-regression")
-    estimator_report = EstimatorReport(estimator, fit=False)
+    estimator, _, X_test, _, y_test = usecase("binary-logistic-regression")
+    estimator_report = EstimatorReport(
+        estimator,
+        fit=False,
+        X_test=X_test,
+        y_test=y_test,
+    )
 
     with pytest.raises(
         TypeError, match="object of type 'EstimatorReport' has no len()"
@@ -59,8 +64,14 @@ def test_comparison_report_init_wrong_parameters():
 def test_comparison_report_init_deepcopy():
     """If an estimator report is modified outside of the comparator, it is not modified
     inside the comparator."""
-    estimator, _, _, _, _ = usecase("binary-logistic-regression")
-    estimator_report = EstimatorReport(estimator, fit=False)
+    estimator, _, X_test, _, y_test = usecase("binary-logistic-regression")
+    estimator_report = EstimatorReport(
+        estimator,
+        fit=False,
+        X_test=X_test,
+        y_test=y_test,
+    )
+
     comp = ComparisonReport([estimator_report, estimator_report])
 
     # check if the deepcopy work well
@@ -74,33 +85,33 @@ def test_comparison_report_init_deepcopy():
     assert comp.estimator_reports_[0]._hash != 0
 
 
-def test_comparison_report_init_MissingTestDataWarning(capsys):
+def test_comparison_report_init_without_testing_data():
     """Raise a warning if there is no test data (`None`) for any estimator
     report."""
+    estimator, _, _, _, _ = usecase("binary-logistic-regression")
+    estimator_report = EstimatorReport(estimator, fit=False)
 
-    estimator, X_train, _, y_train, _ = usecase("binary-logistic-regression")
-    estimator_report = EstimatorReport(
-        estimator,
-        fit=False,
-        X_train=X_train,
-        y_train=y_train,
-    )
-
-    ComparisonReport([estimator_report, estimator_report])
-
-    captured = capsys.readouterr()
-
-    assert "MissingTestDataWarning" in captured.out
+    with pytest.raises(ValueError, match="Cannot compare reports without testing data"):
+        ComparisonReport([estimator_report, estimator_report])
 
 
 def test_comparison_report_init_different_ml_usecases():
-    linear_regression_estimator, _, _, _, _ = usecase("linear-regression")
-    linear_regression_report = EstimatorReport(linear_regression_estimator, fit=False)
+    linear_regression_estimator, _, X_test, _, y_test = usecase("linear-regression")
+    linear_regression_report = EstimatorReport(
+        linear_regression_estimator,
+        fit=False,
+        X_test=X_test,
+        y_test=y_test,
+    )
 
-    logistic_regression_estimator, _, _, _, _ = usecase("binary-logistic-regression")
+    logistic_regression_estimator, _, X_test, _, y_test = usecase(
+        "binary-logistic-regression"
+    )
     logistic_regression_report = EstimatorReport(
         logistic_regression_estimator,
         fit=False,
+        X_test=X_test,
+        y_test=y_test,
     )
 
     with pytest.raises(
@@ -175,8 +186,13 @@ def test_comparison_report_init_without_report_names():
 
 
 def test_comparison_report_init_with_invalid_report_names():
-    estimator, _, _, _, _ = usecase("binary-logistic-regression")
-    estimator_report = EstimatorReport(estimator, fit=False)
+    estimator, _, X_test, _, y_test = usecase("binary-logistic-regression")
+    estimator_report = EstimatorReport(
+        estimator,
+        fit=False,
+        X_test=X_test,
+        y_test=y_test,
+    )
 
     with pytest.raises(
         ValueError, match="There should be as many report names as there are reports"
@@ -185,17 +201,27 @@ def test_comparison_report_init_with_invalid_report_names():
 
 
 def test_comparison_report_help(capsys):
-    estimator, _, _, _, _ = usecase("binary-logistic-regression")
-    estimator_report = EstimatorReport(estimator, fit=False)
+    estimator, _, X_test, _, y_test = usecase("binary-logistic-regression")
+    estimator_report = EstimatorReport(
+        estimator,
+        fit=False,
+        X_test=X_test,
+        y_test=y_test,
+    )
 
     ComparisonReport([estimator_report, estimator_report]).help()
 
     assert "Tools to compare estimators" in capsys.readouterr().out
 
 
 def test_comparison_report_repr():
-    estimator, _, _, _, _ = usecase("binary-logistic-regression")
-    estimator_report = EstimatorReport(estimator, fit=False)
+    estimator, _, X_test, _, y_test = usecase("binary-logistic-regression")
+    estimator_report = EstimatorReport(
+        estimator,
+        fit=False,
+        X_test=X_test,
+        y_test=y_test,
+    )
 
     repr_str = repr(ComparisonReport([estimator_report, estimator_report]))
 
@@ -205,8 +231,13 @@ def test_comparison_report_repr():
 
 def test_comparison_report_pickle(tmp_path):
     """Check that we can pickle a comparison report."""
-    estimator, _, _, _, _ = usecase("binary-logistic-regression")
-    estimator_report = EstimatorReport(estimator, fit=False)
+    estimator, _, X_test, _, y_test = usecase("binary-logistic-regression")
+    estimator_report = EstimatorReport(
+        estimator,
+        fit=False,
+        X_test=X_test,
+        y_test=y_test,
+    )
 
     with BytesIO() as stream:
         joblib.dump(ComparisonReport([estimator_report, estimator_report]), stream)