Skip to content

Commit

Permalink
[enhancement] check that all sklearnex estimators are centrally tested (
Browse files Browse the repository at this point in the history
#2037)

* add test_all_estimators_covered

* forgotten import

* forgotten underscore

* remove recursion

* root -> path

* forgot to change kwargs

* first attempt

* wrong if statement

* add underscores

* sklearn_ -> _sklearn_

* fix mistake

* reformulate to wrapper

* swaps

* forgotten :

* isort fixes

* remove preview from search

* its a set

* formatting

* manually remove preview from lsit

* add BasicStatistics

* fix mistake

* Update basic_statistics.py

* Delete sklearnex/linear_model/logistic_path.py

* Update test_common.py

* Update test_common.py

* formatting

* add docstrings

* Update sklearnex/tests/test_common.py

Co-authored-by: Samir Nasibli <samir.nasibli@intel.com>

* Update test_common.py

* collect all uncovered estimators for assert

* isort fixes

* add underscores

* fix validate_data checks for IncBS

* Update k_means.py

* Update test_run_to_run_stability.py

* Update basic_statistics.py

* Update test_run_to_run_stability.py

---------

Co-authored-by: Samir Nasibli <samir.nasibli@intel.com>
  • Loading branch information
icfaust and samir-nasibli authored Oct 10, 2024
1 parent 0d2b42d commit 8883b39
Show file tree
Hide file tree
Showing 25 changed files with 327 additions and 284 deletions.
3 changes: 3 additions & 0 deletions sklearnex/basic_statistics/basic_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ class BasicStatistics(BaseEstimator):
"""
Estimator for basic statistics.
Allows to compute basic statistics for provided data.
Note, some results can exhibit small variations due to
floating point error accumulation and multithreading.
Parameters
----------
result_options: string or list, default='all'
Expand Down
37 changes: 22 additions & 15 deletions sklearnex/basic_statistics/incremental_basic_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,21 +146,22 @@ def _onedal_finalize_fit(self, queue=None):
self._onedal_estimator.finalize_fit(queue=queue)
self._need_to_finalize = False

def _onedal_partial_fit(self, X, sample_weight=None, queue=None):
def _onedal_partial_fit(self, X, sample_weight=None, queue=None, check_input=True):
first_pass = not hasattr(self, "n_samples_seen_") or self.n_samples_seen_ == 0

if sklearn_check_version("1.0"):
X = validate_data(
self,
X,
dtype=[np.float64, np.float32],
reset=first_pass,
)
else:
X = check_array(
X,
dtype=[np.float64, np.float32],
)
if check_input:
if sklearn_check_version("1.0"):
X = validate_data(
self,
X,
dtype=[np.float64, np.float32],
reset=first_pass,
)
else:
X = check_array(
X,
dtype=[np.float64, np.float32],
)

if sample_weight is not None:
sample_weight = _check_sample_weight(sample_weight, X)
Expand Down Expand Up @@ -206,7 +207,9 @@ def _onedal_fit(self, X, sample_weight=None, queue=None):
for batch in gen_batches(X.shape[0], self.batch_size_):
X_batch = X[batch]
weights_batch = sample_weight[batch] if sample_weight is not None else None
self._onedal_partial_fit(X_batch, weights_batch, queue=queue)
self._onedal_partial_fit(
X_batch, weights_batch, queue=queue, check_input=False
)

self.n_features_in_ = X.shape[1]

Expand Down Expand Up @@ -235,7 +238,7 @@ def __getattr__(self, attr):
f"'{self.__class__.__name__}' object has no attribute '{attr}'"
)

def partial_fit(self, X, sample_weight=None):
def partial_fit(self, X, sample_weight=None, check_input=True):
"""Incremental fit with X. All of X is processed as a single batch.
Parameters
Expand All @@ -250,6 +253,9 @@ def partial_fit(self, X, sample_weight=None):
sample_weight : array-like of shape (n_samples,), default=None
Weights for compute weighted statistics, where `n_samples` is the number of samples.
check_input : bool, default=True
Run check_array on X.
Returns
-------
self : object
Expand All @@ -264,6 +270,7 @@ def partial_fit(self, X, sample_weight=None):
},
X,
sample_weight,
check_input=check_input,
)
return self

Expand Down
14 changes: 7 additions & 7 deletions sklearnex/cluster/dbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from abc import ABC

from scipy import sparse as sp
from sklearn.cluster import DBSCAN as sklearn_DBSCAN
from sklearn.cluster import DBSCAN as _sklearn_DBSCAN
from sklearn.utils.validation import _check_sample_weight

from daal4py.sklearn._n_jobs_support import control_n_jobs
Expand All @@ -34,7 +34,7 @@
if sklearn_check_version("1.6"):
from sklearn.utils.validation import validate_data
else:
validate_data = sklearn_DBSCAN._validate_data
validate_data = _sklearn_DBSCAN._validate_data


class BaseDBSCAN(ABC):
Expand All @@ -51,11 +51,11 @@ def _save_attributes(self):


@control_n_jobs(decorated_methods=["fit"])
class DBSCAN(sklearn_DBSCAN, BaseDBSCAN):
__doc__ = sklearn_DBSCAN.__doc__
class DBSCAN(_sklearn_DBSCAN, BaseDBSCAN):
__doc__ = _sklearn_DBSCAN.__doc__

if sklearn_check_version("1.2"):
_parameter_constraints: dict = {**sklearn_DBSCAN._parameter_constraints}
_parameter_constraints: dict = {**_sklearn_DBSCAN._parameter_constraints}

def __init__(
self,
Expand Down Expand Up @@ -185,7 +185,7 @@ def fit(self, X, y=None, sample_weight=None):
"fit",
{
"onedal": self.__class__._onedal_fit,
"sklearn": sklearn_DBSCAN.fit,
"sklearn": _sklearn_DBSCAN.fit,
},
X,
y,
Expand All @@ -194,4 +194,4 @@ def fit(self, X, y=None, sample_weight=None):

return self

fit.__doc__ = sklearn_DBSCAN.fit.__doc__
fit.__doc__ = _sklearn_DBSCAN.fit.__doc__
28 changes: 14 additions & 14 deletions sklearnex/cluster/k_means.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

import numpy as np
from scipy.sparse import issparse
from sklearn.cluster import KMeans as sklearn_KMeans
from sklearn.cluster import KMeans as _sklearn_KMeans
from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
from sklearn.utils.validation import (
_check_sample_weight,
Expand All @@ -44,14 +44,14 @@
if sklearn_check_version("1.6"):
from sklearn.utils.validation import validate_data
else:
validate_data = sklearn_KMeans._validate_data
validate_data = _sklearn_KMeans._validate_data

@control_n_jobs(decorated_methods=["fit", "fit_transform", "predict", "score"])
class KMeans(sklearn_KMeans):
__doc__ = sklearn_KMeans.__doc__
class KMeans(_sklearn_KMeans):
__doc__ = _sklearn_KMeans.__doc__

if sklearn_check_version("1.2"):
_parameter_constraints: dict = {**sklearn_KMeans._parameter_constraints}
_parameter_constraints: dict = {**_sklearn_KMeans._parameter_constraints}

def __init__(
self,
Expand Down Expand Up @@ -145,7 +145,7 @@ def fit(self, X, y=None, sample_weight=None):
"fit",
{
"onedal": self.__class__._onedal_fit,
"sklearn": sklearn_KMeans.fit,
"sklearn": _sklearn_KMeans.fit,
},
X,
y,
Expand Down Expand Up @@ -254,7 +254,7 @@ def predict(self, X):
"predict",
{
"onedal": self.__class__._onedal_predict,
"sklearn": sklearn_KMeans.predict,
"sklearn": _sklearn_KMeans.predict,
},
X,
)
Expand Down Expand Up @@ -286,7 +286,7 @@ def predict(
"predict",
{
"onedal": self.__class__._onedal_predict,
"sklearn": sklearn_KMeans.predict,
"sklearn": _sklearn_KMeans.predict,
},
X,
sample_weight,
Expand Down Expand Up @@ -339,7 +339,7 @@ def score(self, X, y=None, sample_weight=None):
"score",
{
"onedal": self.__class__._onedal_score,
"sklearn": sklearn_KMeans.score,
"sklearn": _sklearn_KMeans.score,
},
X,
y,
Expand Down Expand Up @@ -384,11 +384,11 @@ def _save_attributes(self):

self._n_init = self._onedal_estimator._n_init

fit.__doc__ = sklearn_KMeans.fit.__doc__
predict.__doc__ = sklearn_KMeans.predict.__doc__
transform.__doc__ = sklearn_KMeans.transform.__doc__
fit_transform.__doc__ = sklearn_KMeans.fit_transform.__doc__
score.__doc__ = sklearn_KMeans.score.__doc__
fit.__doc__ = _sklearn_KMeans.fit.__doc__
predict.__doc__ = _sklearn_KMeans.predict.__doc__
transform.__doc__ = _sklearn_KMeans.transform.__doc__
fit_transform.__doc__ = _sklearn_KMeans.fit_transform.__doc__
score.__doc__ = _sklearn_KMeans.score.__doc__

else:
from daal4py.sklearn.cluster import KMeans
Expand Down
12 changes: 6 additions & 6 deletions sklearnex/covariance/incremental_covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import numpy as np
from scipy import linalg
from sklearn.base import BaseEstimator, clone
from sklearn.covariance import EmpiricalCovariance as sklearn_EmpiricalCovariance
from sklearn.covariance import EmpiricalCovariance as _sklearn_EmpiricalCovariance
from sklearn.covariance import log_likelihood
from sklearn.utils import check_array, gen_batches
from sklearn.utils.validation import _num_features
Expand Down Expand Up @@ -103,8 +103,8 @@ class IncrementalEmpiricalCovariance(BaseEstimator):
"copy": ["boolean"],
}

get_precision = sklearn_EmpiricalCovariance.get_precision
error_norm = wrap_output_data(sklearn_EmpiricalCovariance.error_norm)
get_precision = _sklearn_EmpiricalCovariance.get_precision
error_norm = wrap_output_data(_sklearn_EmpiricalCovariance.error_norm)

def __init__(
self, *, store_precision=False, assume_centered=False, batch_size=None, copy=True
Expand Down Expand Up @@ -374,6 +374,6 @@ def mahalanobis(self, X):
_onedal_cpu_supported = _onedal_supported
_onedal_gpu_supported = _onedal_supported

mahalanobis.__doc__ = sklearn_EmpiricalCovariance.mahalanobis.__doc__
error_norm.__doc__ = sklearn_EmpiricalCovariance.error_norm.__doc__
score.__doc__ = sklearn_EmpiricalCovariance.score.__doc__
mahalanobis.__doc__ = _sklearn_EmpiricalCovariance.mahalanobis.__doc__
error_norm.__doc__ = _sklearn_EmpiricalCovariance.error_norm.__doc__
score.__doc__ = _sklearn_EmpiricalCovariance.score.__doc__
22 changes: 11 additions & 11 deletions sklearnex/decomposition/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,21 +40,21 @@
if sklearn_check_version("1.2"):
from sklearn.utils._param_validation import StrOptions

from sklearn.decomposition import PCA as sklearn_PCA
from sklearn.decomposition import PCA as _sklearn_PCA

from onedal.decomposition import PCA as onedal_PCA

if sklearn_check_version("1.6"):
from sklearn.utils.validation import validate_data
else:
validate_data = sklearn_PCA._validate_data
validate_data = _sklearn_PCA._validate_data

@control_n_jobs(decorated_methods=["fit", "transform", "fit_transform"])
class PCA(sklearn_PCA):
__doc__ = sklearn_PCA.__doc__
class PCA(_sklearn_PCA):
__doc__ = _sklearn_PCA.__doc__

if sklearn_check_version("1.2"):
_parameter_constraints: dict = {**sklearn_PCA._parameter_constraints}
_parameter_constraints: dict = {**_sklearn_PCA._parameter_constraints}
# "onedal_svd" solver uses oneDAL's PCA-SVD algorithm
# and required for testing purposes to fully enable it in future.
# "covariance_eigh" solver is added for ability to explicitly request
Expand Down Expand Up @@ -132,7 +132,7 @@ def _fit(self, X):
"fit",
{
"onedal": self.__class__._onedal_fit,
"sklearn": sklearn_PCA._fit,
"sklearn": _sklearn_PCA._fit,
},
X,
)
Expand Down Expand Up @@ -175,7 +175,7 @@ def transform(self, X):
"transform",
{
"onedal": self.__class__._onedal_transform,
"sklearn": sklearn_PCA.transform,
"sklearn": _sklearn_PCA.transform,
},
X,
)
Expand Down Expand Up @@ -412,10 +412,10 @@ def _validate_n_features_in_after_fitting(self, X):
)
)

fit.__doc__ = sklearn_PCA.fit.__doc__
transform.__doc__ = sklearn_PCA.transform.__doc__
fit_transform.__doc__ = sklearn_PCA.fit_transform.__doc__
inverse_transform.__doc__ = sklearn_PCA.inverse_transform.__doc__
fit.__doc__ = _sklearn_PCA.fit.__doc__
transform.__doc__ = _sklearn_PCA.transform.__doc__
fit_transform.__doc__ = _sklearn_PCA.fit_transform.__doc__
inverse_transform.__doc__ = _sklearn_PCA.inverse_transform.__doc__

else:
from daal4py.sklearn.decomposition import PCA
Expand Down
Loading

0 comments on commit 8883b39

Please sign in to comment.