Skip to content

Commit

Permalink
[tests][python-package] change boston dataset to synthetic dataset in…
Browse files Browse the repository at this point in the history
… tests that don't check score (#4895)

* change boston dataset to synthetic dataset in tests that don't evaluate score

* format imports
  • Loading branch information
jmoralez committed Dec 20, 2021
1 parent 8e729af commit 8a34b1a
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 16 deletions.
18 changes: 9 additions & 9 deletions tests/python_package_test/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

import lightgbm as lgb

from .utils import load_boston, load_breast_cancer, load_digits, load_iris
from .utils import load_boston, load_breast_cancer, load_digits, load_iris, make_synthetic_regression

decreasing_generator = itertools.count(0, -1)

Expand Down Expand Up @@ -731,7 +731,7 @@ def test_continue_train():


def test_continue_train_reused_dataset():
X, y = load_boston(return_X_y=True)
X, y = make_synthetic_regression()
params = {
'objective': 'regression',
'verbose': -1
Expand Down Expand Up @@ -791,7 +791,7 @@ def test_continue_train_multiclass():


def test_cv():
X_train, y_train = load_boston(return_X_y=True)
X_train, y_train = make_synthetic_regression()
params = {'verbose': -1}
lgb_train = lgb.Dataset(X_train, y_train)
# shuffle = False, override metric in params
Expand Down Expand Up @@ -887,7 +887,7 @@ def test_cvbooster():


def test_feature_name():
X_train, y_train = load_boston(return_X_y=True)
X_train, y_train = make_synthetic_regression()
params = {'verbose': -1}
lgb_train = lgb.Dataset(X_train, y_train)
feature_names = [f'f_{i}' for i in range(X_train.shape[-1])]
Expand Down Expand Up @@ -917,7 +917,7 @@ def test_feature_name_with_non_ascii():

def test_save_load_copy_pickle():
def train_and_predict(init_model=None, return_model=False):
X, y = load_boston(return_X_y=True)
X, y = make_synthetic_regression()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
params = {
'objective': 'regression',
Expand Down Expand Up @@ -2102,7 +2102,7 @@ def test_default_objective_and_metric():

@pytest.mark.skipif(psutil.virtual_memory().available / 1024 / 1024 / 1024 < 3, reason='not enough RAM')
def test_model_size():
X, y = load_boston(return_X_y=True)
X, y = make_synthetic_regression()
data = lgb.Dataset(X, y)
bst = lgb.train({'verbose': -1}, data, num_boost_round=2)
y_pred = bst.predict(X)
Expand Down Expand Up @@ -2515,7 +2515,7 @@ def test_dataset_params_with_reference():

def test_extra_trees():
# check extra trees increases regularization
X, y = load_boston(return_X_y=True)
X, y = make_synthetic_regression()
lgb_x = lgb.Dataset(X, label=y)
params = {'objective': 'regression',
'num_leaves': 32,
Expand All @@ -2534,7 +2534,7 @@ def test_extra_trees():

def test_path_smoothing():
# check path smoothing increases regularization
X, y = load_boston(return_X_y=True)
X, y = make_synthetic_regression()
lgb_x = lgb.Dataset(X, label=y)
params = {'objective': 'regression',
'num_leaves': 32,
Expand Down Expand Up @@ -2804,7 +2804,7 @@ def inner_test(X, y, params, early_stopping_rounds):
np.testing.assert_allclose(pred4, pred6)

# test for regression
X, y = load_boston(return_X_y=True)
X, y = make_synthetic_regression()
params = {
'objective': 'regression',
'verbose': -1,
Expand Down
15 changes: 8 additions & 7 deletions tests/python_package_test/test_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@

import lightgbm as lgb

from .utils import load_boston, load_breast_cancer, load_digits, load_iris, load_linnerud, make_ranking
from .utils import (load_boston, load_breast_cancer, load_digits, load_iris, load_linnerud, make_ranking,
make_synthetic_regression)

sk_version = parse_version(sk_version)
if sk_version < parse_version("0.23"):
Expand Down Expand Up @@ -184,7 +185,7 @@ def test_eval_at_aliases():

@pytest.mark.parametrize("custom_objective", [True, False])
def test_objective_aliases(custom_objective):
X, y = load_boston(return_X_y=True)
X, y = make_synthetic_regression()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
if custom_objective:
obj = custom_dummy_obj
Expand Down Expand Up @@ -440,7 +441,7 @@ def test_regressor_chain():


def test_clone_and_property():
X, y = load_boston(return_X_y=True)
X, y = make_synthetic_regression()
gbm = lgb.LGBMRegressor(n_estimators=10, verbose=-1)
gbm.fit(X, y)

Expand All @@ -458,7 +459,7 @@ def test_clone_and_property():


def test_joblib():
X, y = load_boston(return_X_y=True)
X, y = make_synthetic_regression()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMRegressor(n_estimators=10, objective=custom_asymmetric_obj,
verbose=-1, importance_type='split')
Expand Down Expand Up @@ -499,7 +500,7 @@ def test_non_serializable_objects_in_callbacks(tmp_path):
with pytest.raises(Exception, match="This class in not picklable"):
joblib.dump(unpicklable_callback, tmp_path / 'tmp.joblib')

X, y = load_boston(return_X_y=True)
X, y = make_synthetic_regression()
gbm = lgb.LGBMRegressor(n_estimators=5)
gbm.fit(X, y, callbacks=[unpicklable_callback])
assert gbm.booster_.attr('attr_set_inside_callback') == '40'
Expand Down Expand Up @@ -757,7 +758,7 @@ def test_predict_with_params_from_init():


def test_evaluate_train_set():
X, y = load_boston(return_X_y=True)
X, y = make_synthetic_regression()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
gbm = lgb.LGBMRegressor(n_estimators=10, verbose=-1)
gbm.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)])
Expand Down Expand Up @@ -1332,7 +1333,7 @@ def test_training_succeeds_when_data_is_dataframe_and_label_is_column_array(task
X, y = load_iris(return_X_y=True)
model_factory = lgb.LGBMClassifier
elif task == 'regression':
X, y = load_boston(return_X_y=True)
X, y = make_synthetic_regression()
model_factory = lgb.LGBMRegressor
X = pd.DataFrame(X)
y_col_array = y.reshape(-1, 1)
Expand Down
5 changes: 5 additions & 0 deletions tests/python_package_test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,8 @@ def make_ranking(n_samples=100, n_features=20, n_informative=5, gmax=2,
X[:, j] = bias + coef * y_vec

return X, y_vec, group_id_vec


@lru_cache(maxsize=None)
def make_synthetic_regression(n_samples=100):
return sklearn.datasets.make_regression(n_samples, n_features=4, n_informative=2, random_state=42)

0 comments on commit 8a34b1a

Please sign in to comment.