diff --git a/docs/conf.py b/docs/conf.py index ae17adca2..15322b03c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -61,7 +61,8 @@ html_sourcelink_suffix = "" intersphinx_mapping = { - "derivative": ("https://derivative.readthedocs.io/en/latest/", None) + "derivative": ("https://derivative.readthedocs.io/en/latest/", None), + "sklearn": ("https://scikit-learn.org/stable/", None), } # -- Extensions to the Napoleon GoogleDocstring class --------------------- diff --git a/pysindy/feature_library/base.py b/pysindy/feature_library/base.py index 93c6d87e4..9d8257887 100644 --- a/pysindy/feature_library/base.py +++ b/pysindy/feature_library/base.py @@ -25,29 +25,8 @@ class BaseFeatureLibrary(TransformerMixin): Forces subclasses to implement ``fit``, ``transform``, and ``get_feature_names`` methods. - - Parameters - ---------- - library_ensemble : boolean, optional (default False) - Whether or not to use library bagging (regress on subset of the - candidate terms in the library) - - ensemble_indices : integer array, optional (default [0]) - The indices to use for ensembling the library. """ - def __init__(self, library_ensemble=None, ensemble_indices=[0]): - if library_ensemble is not None: - warnings.warn( - "Library ensembling is no longer performed by feature libraries. Use " - "EnsemblingOptimizer to fit an ensemble model.", - DeprecationWarning, - ) - self.library_ensemble = library_ensemble - if np.any(np.asarray(ensemble_indices) < 0): - raise ValueError("Library ensemble indices must be 0 or positive integers.") - self.ensemble_indices = ensemble_indices - def validate_input(self, x, *args, **kwargs): return validate_no_reshape(x, *args, **kwargs) @@ -145,29 +124,6 @@ def get_feature_names(self, input_features=None): """ raise NotImplementedError - def _ensemble(self, xp): - """ - If library bagging, return xp without - the terms at ensemble_indices - """ - warnings.warn( - "Library ensembling is no longer performed by feature libraries. Use " - "EnsemblingOptimizer to fit an ensemble model.", - UserWarning, - ) - - if self.library_ensemble: - if self.n_output_features_ <= len(self.ensemble_indices): - raise ValueError( - "Error: you are trying to chop more library terms " - "than are available to remove!" - ) - inds = range(self.n_output_features_) - inds = np.delete(inds, self.ensemble_indices) - return [x[..., inds] for x in xp] - else: - return xp - def __add__(self, other): return ConcatLibrary([self, other]) @@ -222,14 +178,6 @@ class ConcatLibrary(BaseFeatureLibrary): libraries : list of libraries Library instances to be applied to the input matrix. - library_ensemble : boolean, optional (default False) - Whether or not to use library bagging (regress on subset of the - candidate terms in the library). - - ensemble_indices : integer array, optional (default [0]) - The indices to use for ensembling the library. For instance, if - ensemble_indices = [0], it chops off the first column of the library. - Attributes ---------- n_features_in_ : int @@ -257,12 +205,8 @@ class ConcatLibrary(BaseFeatureLibrary): def __init__( self, libraries: list, - library_ensemble=False, - ensemble_indices=[0], ): - super(ConcatLibrary, self).__init__( - library_ensemble=library_ensemble, ensemble_indices=ensemble_indices - ) + super().__init__() self.libraries = libraries @x_sequence_or_item @@ -319,8 +263,6 @@ def transform(self, x_full): xp = AxesArray(xp, comprehend_axes(xp)) xp_full.append(xp) - if self.library_ensemble: - xp_full = self._ensemble(xp_full) return xp_full def get_feature_names(self, input_features=None): @@ -355,20 +297,12 @@ class TensoredLibrary(BaseFeatureLibrary): libraries : list of libraries Library instances to be applied to the input matrix. - library_ensemble : boolean, optional (default False) - Whether or not to use library bagging (regress on subset of the - candidate terms in the library). - inputs_per_library_ : Sequence of Sequences of ints (default None) list that specifies which input indexes should be passed as inputs for each of the individual feature libraries. length must equal the number of feature libraries. Default is that all inputs are used for every library. - ensemble_indices : integer array, optional (default [0]) - The indices to use for ensembling the library. For instance, if - ensemble_indices = [0], it chops off the first column of the library. - Attributes ---------- libraries_ : list of libraries @@ -399,14 +333,10 @@ class TensoredLibrary(BaseFeatureLibrary): def __init__( self, libraries: list, - library_ensemble=False, inputs_per_library: Optional[Sequence[Sequence[int]]] = None, - ensemble_indices=[0], ): - super(TensoredLibrary, self).__init__( - library_ensemble=library_ensemble, ensemble_indices=ensemble_indices - ) - self.libraries_ = libraries + super().__init__() + self.libraries = libraries self.inputs_per_library = inputs_per_library def _combinations(self, lib_i, lib_j): @@ -470,13 +400,13 @@ def fit(self, x_full, y=None): # If parameter is not set, use all the inputs if self.inputs_per_library is None: self.inputs_per_library = list( - repeat(list(range(n_features)), len(self.libraries_)) + repeat(list(range(n_features)), len(self.libraries)) ) # First fit all libs provided below fitted_libs = [ lib.fit([x[..., _unique(self.inputs_per_library[i])] for x in x_full], y) - for i, lib in enumerate(self.libraries_) + for i, lib in enumerate(self.libraries) ] # Calculate the sum of output features @@ -486,7 +416,7 @@ def fit(self, x_full, y=None): self.n_output_features_ *= osize # Save fitted libs - self.libraries_ = fitted_libs + self.libraries = fitted_libs return self @@ -511,8 +441,8 @@ def transform(self, x_full): xp_full = [] for x in x_full: xp = [] - for i in range(len(self.libraries_)): - lib_i = self.libraries_[i] + for i in range(len(self.libraries)): + lib_i = self.libraries[i] if self.inputs_per_library is None: xp_i = lib_i.transform([x])[0] else: @@ -520,8 +450,8 @@ def transform(self, x_full): [x[..., _unique(self.inputs_per_library[i])]] )[0] - for j in range(i + 1, len(self.libraries_)): - lib_j = self.libraries_[j] + for j in range(i + 1, len(self.libraries)): + lib_j = self.libraries[j] xp_j = lib_j.transform( [x[..., _unique(self.inputs_per_library[j])]] )[0] @@ -531,8 +461,6 @@ def transform(self, x_full): xp = np.concatenate(xp, axis=xp[0].ax_coord) xp = AxesArray(xp, comprehend_axes(xp)) xp_full.append(xp) - if self.library_ensemble: - xp_full = self._ensemble(xp_full) return xp_full def get_feature_names(self, input_features=None): @@ -549,8 +477,8 @@ def get_feature_names(self, input_features=None): output_feature_names : list of string, length n_output_features """ feature_names = list() - for i in range(len(self.libraries_)): - lib_i = self.libraries_[i] + for i in range(len(self.libraries)): + lib_i = self.libraries[i] if input_features is None: input_features_i = [ "x%d" % k for k in _unique(self.inputs_per_library[i]) @@ -560,8 +488,8 @@ def get_feature_names(self, input_features=None): _unique(self.inputs_per_library[i]) ].tolist() lib_i_feat_names = lib_i.get_feature_names(input_features_i) - for j in range(i + 1, len(self.libraries_)): - lib_j = self.libraries_[j] + for j in range(i + 1, len(self.libraries)): + lib_j = self.libraries[j] if input_features is None: input_features_j = [ "x%d" % k for k in _unique(self.inputs_per_library[j]) @@ -577,7 +505,7 @@ def get_feature_names(self, input_features=None): return feature_names def calc_trajectory(self, diff_method, x, t): - return self.libraries_[0].calc_trajectory(diff_method, x, t) + return self.libraries[0].calc_trajectory(diff_method, x, t) def _unique(s: Sequence) -> Sequence: diff --git a/pysindy/feature_library/custom_library.py b/pysindy/feature_library/custom_library.py index bde86fc2a..8065ba9c3 100644 --- a/pysindy/feature_library/custom_library.py +++ b/pysindy/feature_library/custom_library.py @@ -41,13 +41,6 @@ class CustomLibrary(BaseFeatureLibrary): will be included. If False, all combinations will be included. - library_ensemble : boolean, optional (default False) - Whether or not to use library bagging (regress on subset of the - candidate terms in the library) - - ensemble_indices : integer array, optional (default [0]) - The indices to use for ensembling the library. - include_bias : boolean, optional (default False) If True (default), then include a bias column, the feature in which all polynomial powers are zero (i.e. a column of ones - acts as an @@ -92,13 +85,9 @@ def __init__( library_functions, function_names=None, interaction_only=True, - library_ensemble=False, - ensemble_indices=[0], include_bias=False, ): - super(CustomLibrary, self).__init__( - library_ensemble=library_ensemble, ensemble_indices=ensemble_indices - ) + super().__init__() self.functions = library_functions self.function_names = function_names if function_names and ( @@ -219,6 +208,4 @@ def transform(self, x_full): xp = AxesArray(xp, comprehend_axes(xp)) xp_full.append(xp) - if self.library_ensemble: - xp_full = self._ensemble(xp_full) return xp_full diff --git a/pysindy/feature_library/fourier_library.py b/pysindy/feature_library/fourier_library.py index 6ead04d47..a6a61331d 100644 --- a/pysindy/feature_library/fourier_library.py +++ b/pysindy/feature_library/fourier_library.py @@ -25,13 +25,6 @@ class FourierLibrary(BaseFeatureLibrary): include_cos : boolean, optional (default True) If True, include cosine terms in the library. - library_ensemble : boolean, optional (default False) - Whether or not to use library bagging (regress on subset of the - candidate terms in the library) - - ensemble_indices : integer array, optional (default 0) - The indices to use for ensembling the library. - Attributes ---------- n_features_in_ : int @@ -61,12 +54,8 @@ def __init__( n_frequencies=1, include_sin=True, include_cos=True, - library_ensemble=False, - ensemble_indices=[0], ): - super(FourierLibrary, self).__init__( - library_ensemble=library_ensemble, ensemble_indices=ensemble_indices - ) + super().__init__() if not (include_sin or include_cos): raise ValueError("include_sin and include_cos cannot both be False") if n_frequencies < 1 or not isinstance(n_frequencies, int): @@ -164,6 +153,4 @@ def transform(self, x_full): idx += 1 xp = AxesArray(xp, comprehend_axes(xp)) xp_full.append(xp) - if self.library_ensemble: - xp_full = self._ensemble(xp_full) return xp_full diff --git a/pysindy/feature_library/generalized_library.py b/pysindy/feature_library/generalized_library.py index 8a585f212..831b0fab9 100644 --- a/pysindy/feature_library/generalized_library.py +++ b/pysindy/feature_library/generalized_library.py @@ -42,14 +42,6 @@ class GeneralizedLibrary(BaseFeatureLibrary): length must equal the number of feature libraries. Default is that all inputs are used for every library. - library_ensemble : boolean, optional (default False) - Whether or not to use library bagging (regress on subset of the - candidate terms in the library). - - ensemble_indices : integer array, optional (default [0]) - The indices to use for ensembling the library. For instance, if - ensemble_indices = [0], it chops off the first column of the library. - Attributes ---------- self.libraries_full_: list[BaseFeatureLibrary] @@ -82,13 +74,9 @@ def __init__( libraries: list, tensor_array=None, inputs_per_library: Optional[Sequence[Sequence[int]]] = None, - library_ensemble=False, - ensemble_indices=[0], exclude_libraries=[], ): - super(GeneralizedLibrary, self).__init__( - library_ensemble=library_ensemble, ensemble_indices=ensemble_indices - ) + super().__init__() if len(libraries) > 0: self.libraries = libraries @@ -252,8 +240,6 @@ def transform(self, x_full): xp = AxesArray(np.concatenate(xps, axis=xps[0].ax_coord), xps[0].__dict__) xp_full = xp_full + [xp] - if self.library_ensemble: - xp_full = self._ensemble(xp_full) return xp_full def get_feature_names(self, input_features=None): diff --git a/pysindy/feature_library/identity_library.py b/pysindy/feature_library/identity_library.py index 37a06bc52..70d8f231c 100644 --- a/pysindy/feature_library/identity_library.py +++ b/pysindy/feature_library/identity_library.py @@ -18,13 +18,6 @@ class IdentityLibrary(BaseFeatureLibrary): The total number of output features. The number of output features is equal to the number of input features. - library_ensemble : boolean, optional (default False) - Whether or not to use library bagging (regress on subset of the - candidate terms in the library) - - ensemble_indices : integer array, optional (default [0]) - The indices to use for ensembling the library. - Examples -------- >>> import numpy as np @@ -39,15 +32,6 @@ class IdentityLibrary(BaseFeatureLibrary): ['x0', 'x1'] """ - def __init__( - self, - library_ensemble=False, - ensemble_indices=[0], - ): - super(IdentityLibrary, self).__init__( - library_ensemble=library_ensemble, ensemble_indices=ensemble_indices - ) - def get_feature_names(self, input_features=None): """ Return feature names for output features @@ -114,6 +98,4 @@ def transform(self, x_full): raise ValueError("x shape does not match training shape") xp_full = xp_full + [x.copy()] - if self.library_ensemble: - xp_full = self._ensemble(xp_full) return xp_full diff --git a/pysindy/feature_library/parameterized_library.py b/pysindy/feature_library/parameterized_library.py index eb102e044..3b1edf13b 100644 --- a/pysindy/feature_library/parameterized_library.py +++ b/pysindy/feature_library/parameterized_library.py @@ -52,8 +52,6 @@ def __init__( feature_library: BaseFeatureLibrary = PolynomialLibrary(), num_parameters: int = 3, num_features: int = 3, - library_ensemble=False, - ensemble_indices=[0], ): if num_parameters <= 0 or num_features <= 0: raise ValueError("Both num_parameter and num_feature must be positive.") @@ -67,8 +65,6 @@ def __init__( tensor_array=[[1, 1]], exclude_libraries=[0, 1], inputs_per_library=inputs_per_library, - library_ensemble=library_ensemble, - ensemble_indices=ensemble_indices, ) def calc_trajectory(self, diff_method, x, t): diff --git a/pysindy/feature_library/pde_library.py b/pysindy/feature_library/pde_library.py index f92c37b37..4716ac73d 100644 --- a/pysindy/feature_library/pde_library.py +++ b/pysindy/feature_library/pde_library.py @@ -63,13 +63,6 @@ class PDELibrary(BaseFeatureLibrary): will consist of only pure no-derivative terms and pure derivative terms, with no mixed terms. - library_ensemble : boolean, optional (default False) - Whether or not to use library bagging (regress on subset of the - candidate terms in the library) - - ensemble_indices : integer array, optional (default [0]) - The indices to use for ensembling the library. - implicit_terms : boolean Flag to indicate if SINDy-PI (temporal derivatives) is being used for the right-hand side of the SINDy fit. @@ -118,8 +111,6 @@ def __init__( function_names=None, include_bias=False, include_interaction=True, - library_ensemble=False, - ensemble_indices=[0], implicit_terms=False, multiindices=None, differentiation_method=FiniteDifference, @@ -127,9 +118,7 @@ def __init__( is_uniform=None, periodic=None, ): - super(PDELibrary, self).__init__( - library_ensemble=library_ensemble, ensemble_indices=ensemble_indices - ) + super().__init__() self.functions = library_functions self.derivative_order = derivative_order self.function_names = function_names @@ -472,8 +461,6 @@ def transform(self, x_full): library_idx += n_library_terms * self.num_derivatives * n_features xp = AxesArray(xp, comprehend_axes(xp)) xp_full.append(xp) - if self.library_ensemble: - xp_full = self._ensemble(xp_full) return xp_full def get_spatial_grid(self): diff --git a/pysindy/feature_library/polynomial_library.py b/pysindy/feature_library/polynomial_library.py index 577678644..b4753fe09 100644 --- a/pysindy/feature_library/polynomial_library.py +++ b/pysindy/feature_library/polynomial_library.py @@ -44,13 +44,6 @@ class PolynomialLibrary(PolynomialFeatures, BaseFeatureLibrary): Order of output array in the dense case. 'F' order is faster to compute, but may slow down subsequent estimators. - library_ensemble : boolean, optional (default False) - Whether or not to use library bagging (regress on subset of the - candidate terms in the library) - - ensemble_indices : integer array, optional (default [0]) - The indices to use for ensembling the library. - Attributes ---------- powers_ : array, shape (n_output_features, n_input_features) @@ -71,8 +64,6 @@ def __init__( interaction_only=False, include_bias=True, order="C", - library_ensemble=False, - ensemble_indices=[0], ): super(PolynomialLibrary, self).__init__( degree=degree, @@ -80,9 +71,6 @@ def __init__( include_bias=include_bias, order=order, ) - BaseFeatureLibrary.__init__( - self, library_ensemble=library_ensemble, ensemble_indices=ensemble_indices - ) if degree < 0 or not isinstance(degree, int): raise ValueError("degree must be a nonnegative integer") if (not include_interaction) and interaction_only: @@ -284,6 +272,4 @@ def transform(self, x_full): for i, comb in enumerate(combinations): xp[..., i] = x[..., comb].prod(-1) xp_full = xp_full + [xp] - if self.library_ensemble: - xp_full = self._ensemble(xp_full) return xp_full diff --git a/pysindy/feature_library/sindy_pi_library.py b/pysindy/feature_library/sindy_pi_library.py index ddccf3605..04535369a 100644 --- a/pysindy/feature_library/sindy_pi_library.py +++ b/pysindy/feature_library/sindy_pi_library.py @@ -72,13 +72,6 @@ class SINDyPILibrary(BaseFeatureLibrary): This is hard to do with just lambda functions, because if the system is not 1D, lambdas will generate duplicates. - library_ensemble : boolean, optional (default False) - Whether or not to use library bagging (regress on subset of the - candidate terms in the library) - - ensemble_indices : integer array, optional (default [0]) - The indices to use for ensembling the library. - Attributes ---------- functions : list of functions @@ -145,12 +138,8 @@ def __init__( interaction_only=True, differentiation_method=None, include_bias=False, - library_ensemble=False, - ensemble_indices=[0], ): - super(SINDyPILibrary, self).__init__( - library_ensemble=library_ensemble, ensemble_indices=ensemble_indices - ) + super().__init__() self.x_functions = library_functions self.x_dot_functions = x_dot_library_functions self.function_names = function_names @@ -417,6 +406,4 @@ def transform(self, x_full): ) * f_dot(*[x_dot[:, comb] for comb in f_dot_combs]) library_idx += 1 xp_full = xp_full + [AxesArray(xp, x.__dict__)] - if self.library_ensemble: - xp_full = self._ensemble(xp_full) return xp_full diff --git a/pysindy/feature_library/weak_pde_library.py b/pysindy/feature_library/weak_pde_library.py index 7f875ad5b..3c7266b31 100644 --- a/pysindy/feature_library/weak_pde_library.py +++ b/pysindy/feature_library/weak_pde_library.py @@ -98,13 +98,6 @@ class WeakPDELibrary(BaseFeatureLibrary): Positive integer to define the polynomial degree of the spatial weights used for weak/integral SINDy. - library_ensemble : boolean, optional (default False) - Whether or not to use library bagging (regress on subset of the - candidate terms in the library) - - ensemble_indices : integer array, optional (default [0]) - The indices to use for ensembling the library. - num_pts_per_domain : int, deprecated (default None) Included here to retain backwards compatibility with older code that uses this parameter. However, it merely raises a @@ -170,8 +163,6 @@ def __init__( K=100, H_xt=None, p=4, - library_ensemble=False, - ensemble_indices=[0], num_pts_per_domain=None, implicit_terms=False, multiindices=None, @@ -180,9 +171,7 @@ def __init__( is_uniform=None, periodic=None, ): - super(WeakPDELibrary, self).__init__( - library_ensemble=library_ensemble, ensemble_indices=ensemble_indices - ) + super().__init__() self.functions = library_functions self.derivative_order = derivative_order self.function_names = function_names @@ -1061,8 +1050,6 @@ def transform(self, x_full): library_idx += n_library_terms * self.num_derivatives * n_features xp_full = xp_full + [AxesArray(xp, {"ax_sample": 0, "ax_coord": 1})] - if self.library_ensemble: - xp_full = self._ensemble(xp_full) return xp_full def calc_trajectory(self, diff_method, x, t): diff --git a/pysindy/optimizers/base.py b/pysindy/optimizers/base.py index 377f8a171..da0ac930e 100644 --- a/pysindy/optimizers/base.py +++ b/pysindy/optimizers/base.py @@ -281,19 +281,19 @@ def __init__( raise ValueError( "If not ensembling data or library terms, use another optimizer" ) - if n_subset is not None and n_subset <= 0: + if bagging and (n_subset is None or n_subset < 1): raise ValueError("n_subset must be a positive integer if bagging") - if n_candidates_to_drop is not None and n_candidates_to_drop <= 0: + if library_ensemble and ( + n_candidates_to_drop is None or n_candidates_to_drop < 1 + ): raise ValueError( "n_candidates_to_drop must be a positive integer if ensembling library" ) - self.opt = opt - if n_models is None or n_models == 0: - warnings.warn( - "n_models must be a positive integer. Explicitly initialized to zero" - " or None, defaulting to 20." + if n_models < 1: + raise ValueError( + "n_candidates_to_drop must be a positive integer if ensembling library" ) - n_models = 20 + self.opt = opt self.n_models = n_models self.n_subset = n_subset self.bagging = bagging @@ -316,7 +316,7 @@ def _reduce(self, x: AxesArray, y: np.ndarray) -> None: else: n_subset = self.n_subset - n_features = x.shape[x.ax_coord] + n_features = x.n_coord if self.library_ensemble and self.n_candidates_to_drop > n_features: warnings.warn( "n_candidates_to_drop larger than number of features. Cannot " diff --git a/pysindy/pysindy.py b/pysindy/pysindy.py index d7af0fb19..e7bd1b4c7 100644 --- a/pysindy/pysindy.py +++ b/pysindy/pysindy.py @@ -14,7 +14,6 @@ from .differentiation import FiniteDifference from .feature_library import PolynomialLibrary -from .optimizers import EnsembleOptimizer from .optimizers import SINDyOptimizer try: # Waiting on PEP 690 to lazy import CVXPY @@ -183,10 +182,6 @@ def fit( u=None, multiple_trajectories=False, unbias=True, - ensemble=False, - library_ensemble=False, - n_subset=None, - n_models=None, ): """ Fit a SINDy model. @@ -243,45 +238,11 @@ def fit( identified by the optimizer. This helps to remove the bias introduced by regularization. - ensemble : boolean, optional (default False) - This parameter is used to allow for "ensembling", i.e. the - generation of many SINDy models (n_models) by choosing a random - temporal subset of the input data (n_subset) for each sparse - regression. This often improves robustness because averages - (bagging) or medians (bragging) of all the models are usually - quite high-performing. The user can also generate "distributions" - of many models, and calculate how often certain library terms - are included in a model. - - library_ensemble : boolean, optional (default False) - This parameter is used to allow for "library ensembling", - i.e. the generation of many SINDy models (n_models) by choosing - a random subset of the candidate library terms to truncate. So, - n_models are generated by solving n_models sparse regression - problems on these "reduced" libraries. Once again, this often - improves robustness because averages (bagging) or medians - (bragging) of all the models are usually quite high-performing. - The user can also generate "distributions" of many models, and - calculate how often certain library terms are included in a model. - - n_subset : int, optional (default len(time base)) - Number of time points to use for ensemble - - n_models : int, optional (default 20) - Number of models to generate via ensemble - Returns ------- self: a fitted :class:`SINDy` instance """ - if ensemble or library_ensemble: - # DeprecationWarning are ignored by default... - warnings.warn( - "Ensembling arguments are deprecated." - "Use the EnsembleOptimizer class instead.", - UserWarning, - ) if t is None: t = self.t_default @@ -301,11 +262,6 @@ def fit( x, t, x_dot, u, self.feature_library ) - if (n_models is not None) and n_models <= 0: - raise ValueError("n_models must be a positive integer") - if (n_subset is not None) and n_subset <= 0: - raise ValueError("n_subset must be a positive integer") - if u is None: self.n_control_features_ = 0 else: @@ -317,10 +273,6 @@ def fit( self.n_control_features_ = u[0].shape[u[0].ax_coord] x, x_dot = self._process_multiple_trajectories(x, t, x_dot) - # Set ensemble variables - self.ensemble = ensemble - self.library_ensemble = library_ensemble - # Append control variables if u is not None: x = [np.concatenate((xi, ui), axis=xi.ax_coord) for xi, ui in zip(x, u)] @@ -328,52 +280,7 @@ def fit( if hasattr(self.optimizer, "unbias"): unbias = self.optimizer.unbias - # backwards compatibility for ensemble options - if ensemble and n_subset is None: - n_subset = x[0].shape[x[0].ax_time] - if library_ensemble: - self.feature_library.library_ensemble = False - if ensemble and not library_ensemble: - if n_subset is None: - n_sample_tot = np.sum([xi.shape[xi.ax_time] for xi in x]) - n_subset = int(0.6 * n_sample_tot) - optimizer = SINDyOptimizer( - EnsembleOptimizer( - self.optimizer, - bagging=True, - n_subset=n_subset, - n_models=n_models, - ), - unbias=unbias, - ) - self.coef_list = optimizer.optimizer.coef_list - elif not ensemble and library_ensemble: - optimizer = SINDyOptimizer( - EnsembleOptimizer( - self.optimizer, - library_ensemble=True, - n_models=n_models, - ), - unbias=unbias, - ) - self.coef_list = optimizer.optimizer.coef_list - elif ensemble and library_ensemble: - if n_subset is None: - n_sample_tot = np.sum([xi.shape[xi.ax_time] for xi in x]) - n_subset = int(0.6 * n_sample_tot) - optimizer = SINDyOptimizer( - EnsembleOptimizer( - self.optimizer, - bagging=True, - n_subset=n_subset, - n_models=n_models, - library_ensemble=True, - ), - unbias=unbias, - ) - self.coef_list = optimizer.optimizer.coef_list - else: - optimizer = SINDyOptimizer(self.optimizer, unbias=unbias) + optimizer = SINDyOptimizer(self.optimizer, unbias=unbias) steps = [ ("features", self.feature_library), ("shaping", SampleConcatter()), diff --git a/test/test_feature_library.py b/test/test_feature_library.py index aad485a49..19f35c3f3 100644 --- a/test/test_feature_library.py +++ b/test/test_feature_library.py @@ -23,6 +23,7 @@ from pysindy.feature_library import TensoredLibrary from pysindy.feature_library import WeakPDELibrary from pysindy.feature_library.base import BaseFeatureLibrary +from pysindy.optimizers import EnsembleOptimizer from pysindy.optimizers import SINDyPI from pysindy.optimizers import STLSQ @@ -427,49 +428,6 @@ def test_not_fitted(data_lorenz, library): library.transform(x) -@pytest.mark.parametrize( - "library", - [ - IdentityLibrary(), - PolynomialLibrary(), - FourierLibrary(), - PolynomialLibrary() + FourierLibrary(), - pytest.lazy_fixture("data_custom_library"), - pytest.lazy_fixture("data_generalized_library"), - pytest.lazy_fixture("data_ode_library"), - pytest.lazy_fixture("data_pde_library"), - pytest.lazy_fixture("data_sindypi_library"), - ], -) -def test_library_ensemble(data_lorenz, library): - x, t = data_lorenz - library.fit(x) - n_output_features = library.n_output_features_ - library.library_ensemble = True - xp = library.transform(x) - assert n_output_features == xp.shape[1] + 1 - library.ensemble_indices = [0, 1] - with pytest.warns(UserWarning): - xp = library.transform(x) - assert n_output_features == xp.shape[1] + 2 - library.ensemble_indices = np.zeros(1000, dtype=int).tolist() - with pytest.raises(ValueError): - xp = library.transform(x) - - -@pytest.mark.parametrize( - "library", - [ - IdentityLibrary, - PolynomialLibrary, - FourierLibrary, - ], -) -def test_bad_library_ensemble(library): - with pytest.raises(ValueError): - library = library(ensemble_indices=-1) - - def test_generalized_library(data_lorenz): x, t = data_lorenz poly_library = PolynomialLibrary(include_bias=False) @@ -690,7 +648,7 @@ def test_parameterized_library(diffuse_multiple_trajectories): model = SINDy( feature_library=pde_lib, optimizer=optimizer, feature_names=["u", "c"] ) - model.fit(xs, u=us, multiple_trajectories=True, t=t, ensemble=False) + model.fit(xs, u=us, multiple_trajectories=True, t=t) assert abs(model.coefficients()[0][4] - 1) < 1e-2 assert np.all(model.coefficients()[0][:4] == 0) assert np.all(model.coefficients()[0][5:] == 0) @@ -699,7 +657,7 @@ def test_parameterized_library(diffuse_multiple_trajectories): model = SINDy( feature_library=weak_lib, optimizer=optimizer, feature_names=["u", "c"] ) - model.fit(xs, u=us, multiple_trajectories=True, t=t, ensemble=False) + model.fit(xs, u=us, multiple_trajectories=True, t=t) assert abs(model.coefficients()[0][4] - 1) < 1e-2 assert np.all(model.coefficients()[0][:4] == 0) assert np.all(model.coefficients()[0][5:] == 0) @@ -707,17 +665,21 @@ def test_parameterized_library(diffuse_multiple_trajectories): # Helper function for testing PDE libraries def pde_library_helper(library, u, coef_first_dim): - opt = STLSQ(normalize_columns=True, alpha=1e-10, threshold=0) - model = SINDy(optimizer=opt, feature_library=library) + base_opt = STLSQ(normalize_columns=True, alpha=1e-10, threshold=0) + model = SINDy(optimizer=base_opt, feature_library=library) model.fit(u) - assert np.any(opt.coef_ != 0.0) + assert np.any(base_opt.coef_ != 0.0) n_features = len(model.get_feature_names()) - model.fit(u, ensemble=True, n_subset=50, n_models=10) - assert np.shape(model.coef_list) == (10, coef_first_dim, n_features) + opt = EnsembleOptimizer(opt=base_opt, bagging=True, n_models=10, n_subset=50) + model = SINDy(optimizer=opt, feature_library=library) + model.fit(u) + assert np.shape(opt.coef_list) == (10, coef_first_dim, n_features) - model.fit(u, library_ensemble=True, n_models=10) - assert np.shape(model.coef_list) == (10, coef_first_dim, n_features) + opt = EnsembleOptimizer(opt=base_opt, library_ensemble=True, n_models=10) + model = SINDy(optimizer=opt, feature_library=library) + model.fit(u) + assert np.shape(opt.coef_list) == (10, coef_first_dim, n_features) def test_1D_pdes(data_1d_random_pde): diff --git a/test/test_optimizers.py b/test/test_optimizers.py index 33d5e18a3..07e96c39c 100644 --- a/test/test_optimizers.py +++ b/test/test_optimizers.py @@ -13,7 +13,6 @@ from sklearn.utils.validation import check_is_fitted from pysindy import FiniteDifference -from pysindy import PDELibrary from pysindy import PolynomialLibrary from pysindy import SINDy from pysindy.feature_library import CustomLibrary @@ -1028,54 +1027,34 @@ def test_normalize_columns(data_derivative_1d, optimizer): assert opt.coef_.shape == (1, x.shape[1]) -def test_legacy_ensemble_odes(data_lorenz): - x, t = data_lorenz - opt = STLSQ(normalize_columns=True) - model = SINDy(optimizer=opt) - model.fit(x, ensemble=True, n_models=2, n_subset=2) - assert np.shape(model.coef_list) == (2, 3, 10) - - @pytest.mark.parametrize( "optimizer_params", ( - {"library_ensemble": True}, - {"bagging": True}, - {"library_ensemble": True, "bagging": True}, + {"library_ensemble": True, "n_models": 2}, + {"bagging": True, "n_models": 2, "n_subset": 2}, + {"library_ensemble": True, "bagging": True, "n_models": 2, "n_subset": 2}, ), ) def test_ensemble_optimizer(data_lorenz, optimizer_params): x, t = data_lorenz optimizer = EnsembleOptimizer(STLSQ(), **optimizer_params) - feature_library = PolynomialLibrary() - model = SINDy(feature_library=feature_library, optimizer=optimizer) - model.fit(x, t) - assert model.coefficients().shape == (3, 10) + optimizer.fit(x, x) + assert optimizer.coef_.shape == (3, 3) + assert len(optimizer.coef_list) == 2 -def test_legacy_ensemble_pdes(): - u = np.random.randn(10, 10, 2) - t = np.linspace(1, 10, 10) - x = np.linspace(1, 10, 10) - dt = t[1] - t[0] - u_dot = np.zeros(u.shape) - for i in range(len(x)): - u_dot[i, :, :] = FiniteDifference()._differentiate(u[i, :, :], t=dt) - - library_functions = [lambda x: x, lambda x: x * x] - library_function_names = [lambda x: x, lambda x: x + x] - pde_lib = PDELibrary( - library_functions=library_functions, - function_names=library_function_names, - derivative_order=3, - spatial_grid=x, - include_bias=True, - ) - opt = STLSQ(normalize_columns=True) - model = SINDy(optimizer=opt, feature_library=pde_lib) - model.fit(u, x_dot=u_dot, ensemble=True, n_models=2, n_subset=2) - n_features = len(model.get_feature_names()) - assert np.shape(model.coef_list) == (2, 2, n_features) +@pytest.mark.parametrize( + "params", + [ + dict(), + dict(bagging=True, n_models=0), + dict(bagging=True, n_subset=0), + dict(library_ensemble=True, n_candidates_to_drop=0), + ], +) +def test_bad_ensemble_params(data_lorenz, params): + with pytest.raises(ValueError): + EnsembleOptimizer(opt=STLSQ(), **params) def test_ssr_criteria(data_lorenz): diff --git a/test/test_pysindy.py b/test/test_pysindy.py index 31d8ee757..9a4630166 100644 --- a/test/test_pysindy.py +++ b/test/test_pysindy.py @@ -30,6 +30,7 @@ from pysindy.feature_library import PolynomialLibrary from pysindy.feature_library import WeakPDELibrary from pysindy.optimizers import ConstrainedSR3 +from pysindy.optimizers import EnsembleOptimizer from pysindy.optimizers import SR3 from pysindy.optimizers import STLSQ @@ -636,210 +637,6 @@ def test_linear_constraints(data_lorenz): ) -def test_ensemble(data_lorenz): - x, t = data_lorenz - library = PolynomialLibrary().fit(x) - - constraint_rhs = np.ones(2) - constraint_lhs = np.zeros((2, x.shape[1] * library.n_output_features_)) - - target_1, target_2 = 1, 3 - constraint_lhs[0, 3] = target_1 - constraint_lhs[1, library.n_output_features_] = target_2 - - optimizer = ConstrainedSR3( - constraint_lhs=constraint_lhs, constraint_rhs=constraint_rhs - ) - model = SINDy(feature_library=library, optimizer=optimizer).fit( - x, t, ensemble=True, n_models=10, n_subset=len(t) // 2 - ) - assert len(model.coef_list) == 10 - - -def test_ensemble_pdes(data_1d_random_pde): - t, spatial_grid, u, u_dot = data_1d_random_pde - library_functions = [lambda x: x, lambda x: x * x] - library_function_names = [lambda x: x, lambda x: x + x] - pde_lib = PDELibrary( - library_functions=library_functions, - function_names=library_function_names, - derivative_order=4, - spatial_grid=spatial_grid, - include_bias=True, - ) - model = SINDy(feature_library=pde_lib).fit( - u, t, ensemble=True, n_models=10, n_subset=len(t) // 2 - ) - assert len(model.coef_list) == 10 - model = SINDy(feature_library=pde_lib).fit( - u, x_dot=u_dot, ensemble=True, n_models=10, n_subset=len(t) // 2 - ) - assert len(model.coef_list) == 10 - - -def test_ensemble_weak_pdes(data_1d_random_pde): - t, x, u, u_dot = data_1d_random_pde - library_functions = [lambda x: x, lambda x: x * x] - library_function_names = [lambda x: x, lambda x: x + x] - X, T = np.meshgrid(x, t) - XT = np.array([X, T]).T - weak_lib = WeakPDELibrary( - library_functions=library_functions, - function_names=library_function_names, - derivative_order=4, - spatiotemporal_grid=XT, - include_bias=True, - ) - model = SINDy(feature_library=weak_lib).fit( - u, t=t, ensemble=True, n_models=2, n_subset=len(t) // 2 - ) - assert len(model.coef_list) == 2 - model = SINDy(feature_library=weak_lib).fit( - u, x_dot=u_dot[:, 0, :], ensemble=True, n_models=2, n_subset=len(t) // 2 - ) - assert len(model.coef_list) == 2 - - -def test_library_ensemble(data_lorenz): - x, t = data_lorenz - library = PolynomialLibrary() - model = SINDy(feature_library=library).fit( - x, t=t, library_ensemble=True, n_models=10 - ) - assert len(model.coef_list) == 10 - - -def test_library_ensemble_pde(data_1d_random_pde): - t, spatial_grid, u, u_dot = data_1d_random_pde - library_functions = [lambda x: x, lambda x: x * x] - library_function_names = [lambda x: x, lambda x: x + x] - pde_lib = PDELibrary( - library_functions=library_functions, - function_names=library_function_names, - derivative_order=4, - spatial_grid=spatial_grid, - include_bias=True, - ) - model = SINDy(feature_library=pde_lib).fit( - u, t=t, library_ensemble=True, n_models=10 - ) - assert len(model.coef_list) == 10 - model = SINDy(feature_library=pde_lib).fit( - u, x_dot=u_dot, library_ensemble=True, n_models=10 - ) - assert len(model.coef_list) == 10 - - -def test_library_ensemble_weak_pde(data_1d_random_pde): - t, x, u, u_dot = data_1d_random_pde - library_functions = [lambda x: x, lambda x: x * x] - library_function_names = [lambda x: x, lambda x: x + x] - X, T = np.meshgrid(x, t) - XT = np.array([X, T]).T - weak_lib = WeakPDELibrary( - library_functions=library_functions, - function_names=library_function_names, - derivative_order=4, - spatiotemporal_grid=XT, - include_bias=True, - ) - model = SINDy(feature_library=weak_lib).fit( - u, t=t, library_ensemble=True, n_models=10 - ) - assert len(model.coef_list) == 10 - u_dot = weak_lib.convert_u_dot_integral(u) - model = SINDy(feature_library=weak_lib).fit( - u, x_dot=u_dot, library_ensemble=True, n_models=10 - ) - assert len(model.coef_list) == 10 - - -def test_both_ensemble(data_lorenz): - x, t = data_lorenz - library = PolynomialLibrary() - model = SINDy(feature_library=library).fit( - x, t=t, ensemble=True, library_ensemble=True, n_models=2 - ) - assert len(model.coef_list) == 2 - - -def test_both_ensemble_pde(data_1d_random_pde): - t, spatial_grid, u, u_dot = data_1d_random_pde - library_functions = [lambda x: x, lambda x: x * x] - library_function_names = [lambda x: x, lambda x: x + x] - pde_lib = PDELibrary( - library_functions=library_functions, - function_names=library_function_names, - derivative_order=4, - spatial_grid=spatial_grid, - include_bias=True, - ) - model = SINDy(feature_library=pde_lib).fit( - u, t=t, ensemble=True, library_ensemble=True, n_models=2 - ) - assert len(model.coef_list) == 2 - model = SINDy(feature_library=pde_lib).fit( - u, x_dot=u_dot, ensemble=True, library_ensemble=True, n_models=2 - ) - assert len(model.coef_list) == 2 - - -def test_both_ensemble_weak_pde(data_1d_random_pde): - t, x, u, u_dot = data_1d_random_pde - library_functions = [lambda x: x, lambda x: x * x] - library_function_names = [lambda x: x, lambda x: x + x] - X, T = np.meshgrid(x, t) - XT = np.array([X, T]).T - weak_lib = WeakPDELibrary( - library_functions=library_functions, - function_names=library_function_names, - derivative_order=4, - spatiotemporal_grid=XT, - include_bias=True, - ) - model = SINDy(feature_library=weak_lib).fit( - u, t=t, ensemble=True, library_ensemble=True, n_models=2 - ) - assert len(model.coef_list) == 2 - u_dot = weak_lib.convert_u_dot_integral(u) - model = SINDy(feature_library=weak_lib).fit( - u, x_dot=u_dot, ensemble=True, library_ensemble=True, n_models=2 - ) - assert len(model.coef_list) == 2 - - -@pytest.mark.parametrize( - "params", - [ - dict(ensemble=False, n_models=-1, n_subset=1), - dict(ensemble=False, n_models=0, n_subset=1), - dict(ensemble=False, n_models=1, n_subset=0), - dict(ensemble=False, n_models=1, n_subset=-1), - dict(ensemble=True, n_models=-1, n_subset=1), - dict(ensemble=True, n_models=0, n_subset=1), - dict(ensemble=True, n_models=1, n_subset=0), - dict(ensemble=True, n_models=1, n_subset=-1), - dict(ensemble=True, n_models=1, n_subset=0), - ], -) -def test_bad_ensemble_params(data_lorenz, params): - x, t = data_lorenz - library = PolynomialLibrary().fit(x) - - constraint_rhs = np.ones(2) - constraint_lhs = np.zeros((2, x.shape[1] * library.n_output_features_)) - - target_1, target_2 = 1, 3 - constraint_lhs[0, 3] = target_1 - constraint_lhs[1, library.n_output_features_] = target_2 - - optimizer = ConstrainedSR3( - constraint_lhs=constraint_lhs, constraint_rhs=constraint_rhs - ) - with pytest.raises(ValueError): - SINDy(feature_library=library, optimizer=optimizer).fit(x, t, **params) - - def test_data_shapes(): model = SINDy() n = 10 @@ -880,32 +677,34 @@ def test_multiple_trajectories_and_ensemble(diffuse_multiple_trajectories): optimizer = STLSQ(threshold=0.1, alpha=1e-5, normalize_columns=False) model = SINDy(feature_library=pde_lib, optimizer=optimizer, feature_names=["u"]) - model.fit(u, multiple_trajectories=True, t=t, ensemble=False) + model.fit(u, multiple_trajectories=True, t=t) print(model.coefficients(), model.coefficients()[0][-1]) assert abs(model.coefficients()[0][-1] - 1) < 1e-2 assert np.all(model.coefficients()[0][:-1] == 0) model = SINDy(feature_library=weak_lib, optimizer=optimizer, feature_names=["u"]) - model.fit(u, multiple_trajectories=True, t=t, ensemble=False) + model.fit(u, multiple_trajectories=True, t=t) assert abs(model.coefficients()[0][-1] - 1) < 1e-2 assert np.all(model.coefficients()[0][:-1] == 0) + optimizer = EnsembleOptimizer(opt=optimizer, bagging=True, n_subset=len(t)) + model = SINDy(feature_library=pde_lib, optimizer=optimizer, feature_names=["u"]) - model.fit(u, multiple_trajectories=True, t=t, ensemble=True, n_subset=len(t)) + model.fit(u, multiple_trajectories=True, t=t) assert abs(model.coefficients()[0][-1] - 1) < 1e-2 assert np.all(model.coefficients()[0][:-1] == 0) model = SINDy(feature_library=weak_lib, optimizer=optimizer, feature_names=["u"]) - model.fit(u, multiple_trajectories=True, t=t, ensemble=True, n_subset=len(t)) + model.fit(u, multiple_trajectories=True, t=t) assert abs(model.coefficients()[0][-1] - 1) < 1e-2 assert np.all(model.coefficients()[0][:-1] == 0) model = SINDy(feature_library=pde_lib, optimizer=optimizer, feature_names=["u"]) - model.fit(u, multiple_trajectories=True, t=t, ensemble=True) + model.fit(u, multiple_trajectories=True, t=t) assert abs(model.coefficients()[0][-1] - 1) < 1e-2 assert np.all(model.coefficients()[0][:-1] == 0) model = SINDy(feature_library=weak_lib, optimizer=optimizer, feature_names=["u"]) - model.fit(u, multiple_trajectories=True, t=t, ensemble=True) + model.fit(u, multiple_trajectories=True, t=t) assert abs(model.coefficients()[0][-1] - 1) < 1e-2 assert np.all(model.coefficients()[0][:-1] == 0)