From adc41eeb8ce69396970b7c79ba9a218d8a7e20bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Fri, 22 Oct 2021 12:23:30 +0200 Subject: [PATCH] Replace #507 + fix bug with XGBoost converter when base_score is None (#510) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Replace #507 * fix xgboost converter with xgboost 1.5.0 * fix nan values * update catboost test * update ci Signed-off-by: xavier dupré Co-authored-by: xavier dupré Co-authored-by: Karthikeyan Singaravelan --- .azure-pipelines/win32-conda-CI.yml | 5 + README.md | 2 +- .../xgboost/operator_converters/XGBoost.py | 2 + tests/catboost/test_CatBoost_converter.py | 22 +++- tests/xgboost/test_xgboost_converters.py | 107 ++++-------------- 5 files changed, 48 insertions(+), 90 deletions(-) diff --git a/.azure-pipelines/win32-conda-CI.yml b/.azure-pipelines/win32-conda-CI.yml index cab7d7449..6792cd937 100644 --- a/.azure-pipelines/win32-conda-CI.yml +++ b/.azure-pipelines/win32-conda-CI.yml @@ -77,6 +77,11 @@ jobs: python -m pip install -r requirements-dev.txt displayName: 'Install dependencies-dev' + - script: | + call activate py$(python.version) + python -m pip install --upgrade scikit-learn + displayName: 'Install scikit-learn' + - script: | call activate py$(python.version) python -m pip install %COREML_PATH% diff --git a/README.md b/README.md index f302a4e5a..02ec43e3c 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,7 @@ input2 = Input(shape=(D,)) mapped1_2 = sub_model1(input1) mapped2_2 = sub_model2(input2) sub_sum = Add()([mapped1_2, mapped2_2]) -keras_model = Model(inputs=[input1, input2], output=sub_sum) +keras_model = Model(inputs=[input1, input2], outputs=sub_sum) # Convert it! The target_opset parameter is optional. onnx_model = onnxmltools.convert_keras(keras_model, target_opset=7) diff --git a/onnxmltools/convert/xgboost/operator_converters/XGBoost.py b/onnxmltools/convert/xgboost/operator_converters/XGBoost.py index 3c82d1742..58185ff6d 100644 --- a/onnxmltools/convert/xgboost/operator_converters/XGBoost.py +++ b/onnxmltools/convert/xgboost/operator_converters/XGBoost.py @@ -33,6 +33,8 @@ def common_members(xgb_node, inputs): params = XGBConverter.get_xgb_params(xgb_node) objective = params["objective"] base_score = params["base_score"] + if base_score is None: + base_score = 0.5 booster = xgb_node.get_booster() # The json format was available in October 2017. # XGBoost 0.7 was the first version released with it. diff --git a/tests/catboost/test_CatBoost_converter.py b/tests/catboost/test_CatBoost_converter.py index 007205b8a..2dfc99c8d 100644 --- a/tests/catboost/test_CatBoost_converter.py +++ b/tests/catboost/test_CatBoost_converter.py @@ -4,16 +4,25 @@ Tests for CatBoostRegressor and CatBoostClassifier converter. """ import unittest -import numpy import warnings -import catboost - -from sklearn.datasets import make_regression, make_classification +from distutils.version import StrictVersion +import numpy +try: + import sklearn + from sklearn.datasets import make_regression, make_classification +except (ImportError, FileNotFoundError): + sklearn = None +try: + import catboost +except (ImportError, FileNotFoundError): + catboost = None from onnxmltools.convert import convert_catboost from onnxmltools.utils import dump_data_and_model, dump_single_regression, dump_multiple_classification class TestCatBoost(unittest.TestCase): + + @unittest.skipIf(catboost is None or sklearn is None, reason="catboost not imported") def test_catboost_regressor(self): X, y = make_regression(n_samples=100, n_features=4, random_state=0) catboost_model = catboost.CatBoostRegressor(task_type='CPU', loss_function='RMSE', @@ -26,11 +35,11 @@ def test_catboost_regressor(self): self.assertTrue(catboost_onnx is not None) dump_data_and_model(X.astype(numpy.float32), catboost_model, catboost_onnx, basename="CatBoostReg-Dec4") + @unittest.skipIf(catboost is None or sklearn is None, reason="catboost not imported") def test_catboost_bin_classifier(self): import onnxruntime - from distutils.version import StrictVersion - if StrictVersion(onnxruntime.__version__) >= StrictVersion('1.3.0'): + if StrictVersion('.'.join(onnxruntime.__version__.split('.')[:2])) >= StrictVersion('1.3.0'): X, y = make_classification(n_samples=100, n_features=4, random_state=0) catboost_model = catboost.CatBoostClassifier(task_type='CPU', loss_function='CrossEntropy', n_estimators=10, verbose=0) @@ -45,6 +54,7 @@ def test_catboost_bin_classifier(self): warnings.warn('Converted CatBoost models for binary classification work with onnxruntime version 1.3.0 or ' 'a newer one') + @unittest.skipIf(catboost is None or sklearn is None, reason="catboost not imported") def test_catboost_multi_classifier(self): X, y = make_classification(n_samples=10, n_informative=8, n_classes=3, random_state=0) catboost_model = catboost.CatBoostClassifier(task_type='CPU', loss_function='MultiClass', diff --git a/tests/xgboost/test_xgboost_converters.py b/tests/xgboost/test_xgboost_converters.py index 40a0d136f..1fcbe302f 100644 --- a/tests/xgboost/test_xgboost_converters.py +++ b/tests/xgboost/test_xgboost_converters.py @@ -53,13 +53,8 @@ def test_xgb_regressor(self): self.assertTrue(conv_model is not None) dump_data_and_model( x_test.astype("float32"), - xgb, - conv_model, - basename="SklearnXGBRegressor-Dec3", - allow_failure="StrictVersion(" - "onnx.__version__)" - "< StrictVersion('1.3.0')", - ) + xgb, conv_model, + basename="SklearnXGBRegressor-Dec3") def test_xgb_classifier(self): xgb, x_test = _fit_classification_model(XGBClassifier(), 2) @@ -68,14 +63,8 @@ def test_xgb_classifier(self): target_opset=TARGET_OPSET) self.assertTrue(conv_model is not None) dump_data_and_model( - x_test, - xgb, - conv_model, - basename="SklearnXGBClassifier", - allow_failure="StrictVersion(" - "onnx.__version__)" - "< StrictVersion('1.3.0')", - ) + x_test, xgb, conv_model, + basename="SklearnXGBClassifier") def test_xgb_classifier_uint8(self): xgb, x_test = _fit_classification_model( @@ -85,14 +74,8 @@ def test_xgb_classifier_uint8(self): target_opset=TARGET_OPSET) self.assertTrue(conv_model is not None) dump_data_and_model( - x_test, - xgb, - conv_model, - basename="SklearnXGBClassifier", - allow_failure="StrictVersion(" - "onnx.__version__)" - "< StrictVersion('1.3.0')", - ) + x_test, xgb, conv_model, + basename="SklearnXGBClassifier") def test_xgb_classifier_multi(self): xgb, x_test = _fit_classification_model(XGBClassifier(), 3) @@ -101,14 +84,8 @@ def test_xgb_classifier_multi(self): target_opset=TARGET_OPSET) self.assertTrue(conv_model is not None) dump_data_and_model( - x_test, - xgb, - conv_model, - basename="SklearnXGBClassifierMulti", - allow_failure="StrictVersion(" - "onnx.__version__)" - "< StrictVersion('1.3.0')", - ) + x_test, xgb, conv_model, + basename="SklearnXGBClassifierMulti") def test_xgb_classifier_multi_reglog(self): xgb, x_test = _fit_classification_model( @@ -118,14 +95,8 @@ def test_xgb_classifier_multi_reglog(self): target_opset=TARGET_OPSET) self.assertTrue(conv_model is not None) dump_data_and_model( - x_test, - xgb, - conv_model, - basename="SklearnXGBClassifierMultiRegLog", - allow_failure="StrictVersion(" - "onnx.__version__)" - "< StrictVersion('1.3.0')", - ) + x_test, xgb, conv_model, + basename="SklearnXGBClassifierMultiRegLog") def test_xgb_classifier_reglog(self): xgb, x_test = _fit_classification_model( @@ -135,14 +106,8 @@ def test_xgb_classifier_reglog(self): target_opset=TARGET_OPSET) self.assertTrue(conv_model is not None) dump_data_and_model( - x_test, - xgb, - conv_model, - basename="SklearnXGBClassifierRegLog", - allow_failure="StrictVersion(" - "onnx.__version__)" - "< StrictVersion('1.3.0')", - ) + x_test, xgb, conv_model, + basename="SklearnXGBClassifierRegLog") def test_xgb_classifier_multi_str_labels(self): xgb, x_test = _fit_classification_model( @@ -152,14 +117,8 @@ def test_xgb_classifier_multi_str_labels(self): target_opset=TARGET_OPSET) self.assertTrue(conv_model is not None) dump_data_and_model( - x_test, - xgb, - conv_model, - basename="SklearnXGBClassifierMultiStrLabels", - allow_failure="StrictVersion(" - "onnx.__version__)" - "< StrictVersion('1.3.0')", - ) + x_test, xgb, conv_model, + basename="SklearnXGBClassifierMultiStrLabels") def test_xgb_classifier_multi_discrete_int_labels(self): iris = load_iris() @@ -180,13 +139,8 @@ def test_xgb_classifier_multi_discrete_int_labels(self): self.assertTrue(conv_model is not None) dump_data_and_model( x_test.astype("float32"), - xgb, - conv_model, - basename="SklearnXGBClassifierMultiDiscreteIntLabels", - allow_failure="StrictVersion(" - "onnx.__version__)" - "< StrictVersion('1.3.0')", - ) + xgb, conv_model, + basename="SklearnXGBClassifierMultiDiscreteIntLabels") def test_xgboost_booster_classifier_bin(self): x, y = make_classification(n_classes=2, n_features=5, @@ -202,9 +156,7 @@ def test_xgboost_booster_classifier_bin(self): [('input', FloatTensorType([None, x.shape[1]]))], target_opset=TARGET_OPSET) dump_data_and_model(x_test.astype(np.float32), - model, model_onnx, - allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')", - basename="XGBBoosterMCl") + model, model_onnx, basename="XGBBoosterMCl") def test_xgboost_booster_classifier_multiclass_softprob(self): x, y = make_classification(n_classes=3, n_features=5, @@ -221,9 +173,7 @@ def test_xgboost_booster_classifier_multiclass_softprob(self): [('input', FloatTensorType([None, x.shape[1]]))], target_opset=TARGET_OPSET) dump_data_and_model(x_test.astype(np.float32), - model, model_onnx, - allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')", - basename="XGBBoosterMClSoftProb") + model, model_onnx, basename="XGBBoosterMClSoftProb") def test_xgboost_booster_classifier_multiclass_softmax(self): x, y = make_classification(n_classes=3, n_features=5, @@ -240,9 +190,7 @@ def test_xgboost_booster_classifier_multiclass_softmax(self): [('input', FloatTensorType([None, x.shape[1]]))], target_opset=TARGET_OPSET) dump_data_and_model(x_test.astype(np.float32), - model, model_onnx, - allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')", - basename="XGBBoosterMClSoftMax") + model, model_onnx, basename="XGBBoosterMClSoftMax") def test_xgboost_booster_classifier_reg(self): x, y = make_classification(n_classes=2, n_features=5, @@ -259,9 +207,7 @@ def test_xgboost_booster_classifier_reg(self): [('input', FloatTensorType([None, x.shape[1]]))], target_opset=TARGET_OPSET) dump_data_and_model(x_test.astype(np.float32), - model, model_onnx, - allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')", - basename="XGBBoosterReg") + model, model_onnx, basename="XGBBoosterReg") def test_xgboost_10(self): this = os.path.abspath(os.path.dirname(__file__)) @@ -279,9 +225,9 @@ def test_xgboost_10(self): } train_df = pandas.read_csv(train) - X_train, y_train = train_df.drop('label', axis=1).values, train_df['label'].values + X_train, y_train = train_df.drop('label', axis=1).values, train_df['label'].fillna(0).values test_df = pandas.read_csv(test) - X_test, y_test = test_df.drop('label', axis=1).values, test_df['label'].values + X_test, y_test = test_df.drop('label', axis=1).values, test_df['label'].fillna(0).values regressor = XGBRegressor(verbose=0, objective='reg:squarederror', **param_distributions) regressor.fit(X_train, y_train) @@ -292,9 +238,7 @@ def test_xgboost_10(self): target_opset=TARGET_OPSET) dump_data_and_model( - X_test.astype(np.float32), - regressor, model_onnx, - allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')", + X_test.astype(np.float32), regressor, model_onnx, basename="XGBBoosterRegBug") def test_xgboost_classifier_i5450(self): @@ -315,9 +259,7 @@ def test_xgboost_classifier_i5450(self): bst = clr.get_booster() bst.dump_model('dump.raw.txt') dump_data_and_model( - X_test.astype(np.float32) + 1e-5, - clr, onx, - allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')", + X_test.astype(np.float32) + 1e-5, clr, onx, basename="XGBClassifierIris") def test_xgboost_example_mnist(self): @@ -342,7 +284,6 @@ def test_xgboost_example_mnist(self): dump_data_and_model( X_test.astype(np.float32), clf, onnx_model, - allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')", basename="XGBoostExample") def test_xgb_empty_tree(self):