Skip to content

Commit

Permalink
Replace #507 + fix bug with XGBoost converter when base_score is None (
Browse files Browse the repository at this point in the history
…#510)

* Replace #507
* fix xgboost converter with xgboost 1.5.0
* fix nan values
* update catboost test
* update ci

Signed-off-by: xavier dupré <xavier.dupre@gmail.com>

Co-authored-by: xavier dupré <xavier.dupre@gmail.com>
Co-authored-by: Karthikeyan Singaravelan <tir.karthi@gmail.com>
  • Loading branch information
3 people authored Oct 22, 2021
1 parent 4a44958 commit adc41ee
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 90 deletions.
5 changes: 5 additions & 0 deletions .azure-pipelines/win32-conda-CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ jobs:
python -m pip install -r requirements-dev.txt
displayName: 'Install dependencies-dev'
- script: |
call activate py$(python.version)
python -m pip install --upgrade scikit-learn
displayName: 'Install scikit-learn'
- script: |
call activate py$(python.version)
python -m pip install %COREML_PATH%
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ input2 = Input(shape=(D,))
mapped1_2 = sub_model1(input1)
mapped2_2 = sub_model2(input2)
sub_sum = Add()([mapped1_2, mapped2_2])
keras_model = Model(inputs=[input1, input2], output=sub_sum)
keras_model = Model(inputs=[input1, input2], outputs=sub_sum)

# Convert it! The target_opset parameter is optional.
onnx_model = onnxmltools.convert_keras(keras_model, target_opset=7)
Expand Down
2 changes: 2 additions & 0 deletions onnxmltools/convert/xgboost/operator_converters/XGBoost.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ def common_members(xgb_node, inputs):
params = XGBConverter.get_xgb_params(xgb_node)
objective = params["objective"]
base_score = params["base_score"]
if base_score is None:
base_score = 0.5
booster = xgb_node.get_booster()
# The json format was available in October 2017.
# XGBoost 0.7 was the first version released with it.
Expand Down
22 changes: 16 additions & 6 deletions tests/catboost/test_CatBoost_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,25 @@
Tests for CatBoostRegressor and CatBoostClassifier converter.
"""
import unittest
import numpy
import warnings
import catboost

from sklearn.datasets import make_regression, make_classification
from distutils.version import StrictVersion
import numpy
try:
import sklearn
from sklearn.datasets import make_regression, make_classification
except (ImportError, FileNotFoundError):
sklearn = None
try:
import catboost
except (ImportError, FileNotFoundError):
catboost = None
from onnxmltools.convert import convert_catboost
from onnxmltools.utils import dump_data_and_model, dump_single_regression, dump_multiple_classification


class TestCatBoost(unittest.TestCase):

@unittest.skipIf(catboost is None or sklearn is None, reason="catboost not imported")
def test_catboost_regressor(self):
X, y = make_regression(n_samples=100, n_features=4, random_state=0)
catboost_model = catboost.CatBoostRegressor(task_type='CPU', loss_function='RMSE',
Expand All @@ -26,11 +35,11 @@ def test_catboost_regressor(self):
self.assertTrue(catboost_onnx is not None)
dump_data_and_model(X.astype(numpy.float32), catboost_model, catboost_onnx, basename="CatBoostReg-Dec4")

@unittest.skipIf(catboost is None or sklearn is None, reason="catboost not imported")
def test_catboost_bin_classifier(self):
import onnxruntime
from distutils.version import StrictVersion

if StrictVersion(onnxruntime.__version__) >= StrictVersion('1.3.0'):
if StrictVersion('.'.join(onnxruntime.__version__.split('.')[:2])) >= StrictVersion('1.3.0'):
X, y = make_classification(n_samples=100, n_features=4, random_state=0)
catboost_model = catboost.CatBoostClassifier(task_type='CPU', loss_function='CrossEntropy',
n_estimators=10, verbose=0)
Expand All @@ -45,6 +54,7 @@ def test_catboost_bin_classifier(self):
warnings.warn('Converted CatBoost models for binary classification work with onnxruntime version 1.3.0 or '
'a newer one')

@unittest.skipIf(catboost is None or sklearn is None, reason="catboost not imported")
def test_catboost_multi_classifier(self):
X, y = make_classification(n_samples=10, n_informative=8, n_classes=3, random_state=0)
catboost_model = catboost.CatBoostClassifier(task_type='CPU', loss_function='MultiClass',
Expand Down
107 changes: 24 additions & 83 deletions tests/xgboost/test_xgboost_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,8 @@ def test_xgb_regressor(self):
self.assertTrue(conv_model is not None)
dump_data_and_model(
x_test.astype("float32"),
xgb,
conv_model,
basename="SklearnXGBRegressor-Dec3",
allow_failure="StrictVersion("
"onnx.__version__)"
"< StrictVersion('1.3.0')",
)
xgb, conv_model,
basename="SklearnXGBRegressor-Dec3")

def test_xgb_classifier(self):
xgb, x_test = _fit_classification_model(XGBClassifier(), 2)
Expand All @@ -68,14 +63,8 @@ def test_xgb_classifier(self):
target_opset=TARGET_OPSET)
self.assertTrue(conv_model is not None)
dump_data_and_model(
x_test,
xgb,
conv_model,
basename="SklearnXGBClassifier",
allow_failure="StrictVersion("
"onnx.__version__)"
"< StrictVersion('1.3.0')",
)
x_test, xgb, conv_model,
basename="SklearnXGBClassifier")

def test_xgb_classifier_uint8(self):
xgb, x_test = _fit_classification_model(
Expand All @@ -85,14 +74,8 @@ def test_xgb_classifier_uint8(self):
target_opset=TARGET_OPSET)
self.assertTrue(conv_model is not None)
dump_data_and_model(
x_test,
xgb,
conv_model,
basename="SklearnXGBClassifier",
allow_failure="StrictVersion("
"onnx.__version__)"
"< StrictVersion('1.3.0')",
)
x_test, xgb, conv_model,
basename="SklearnXGBClassifier")

def test_xgb_classifier_multi(self):
xgb, x_test = _fit_classification_model(XGBClassifier(), 3)
Expand All @@ -101,14 +84,8 @@ def test_xgb_classifier_multi(self):
target_opset=TARGET_OPSET)
self.assertTrue(conv_model is not None)
dump_data_and_model(
x_test,
xgb,
conv_model,
basename="SklearnXGBClassifierMulti",
allow_failure="StrictVersion("
"onnx.__version__)"
"< StrictVersion('1.3.0')",
)
x_test, xgb, conv_model,
basename="SklearnXGBClassifierMulti")

def test_xgb_classifier_multi_reglog(self):
xgb, x_test = _fit_classification_model(
Expand All @@ -118,14 +95,8 @@ def test_xgb_classifier_multi_reglog(self):
target_opset=TARGET_OPSET)
self.assertTrue(conv_model is not None)
dump_data_and_model(
x_test,
xgb,
conv_model,
basename="SklearnXGBClassifierMultiRegLog",
allow_failure="StrictVersion("
"onnx.__version__)"
"< StrictVersion('1.3.0')",
)
x_test, xgb, conv_model,
basename="SklearnXGBClassifierMultiRegLog")

def test_xgb_classifier_reglog(self):
xgb, x_test = _fit_classification_model(
Expand All @@ -135,14 +106,8 @@ def test_xgb_classifier_reglog(self):
target_opset=TARGET_OPSET)
self.assertTrue(conv_model is not None)
dump_data_and_model(
x_test,
xgb,
conv_model,
basename="SklearnXGBClassifierRegLog",
allow_failure="StrictVersion("
"onnx.__version__)"
"< StrictVersion('1.3.0')",
)
x_test, xgb, conv_model,
basename="SklearnXGBClassifierRegLog")

def test_xgb_classifier_multi_str_labels(self):
xgb, x_test = _fit_classification_model(
Expand All @@ -152,14 +117,8 @@ def test_xgb_classifier_multi_str_labels(self):
target_opset=TARGET_OPSET)
self.assertTrue(conv_model is not None)
dump_data_and_model(
x_test,
xgb,
conv_model,
basename="SklearnXGBClassifierMultiStrLabels",
allow_failure="StrictVersion("
"onnx.__version__)"
"< StrictVersion('1.3.0')",
)
x_test, xgb, conv_model,
basename="SklearnXGBClassifierMultiStrLabels")

def test_xgb_classifier_multi_discrete_int_labels(self):
iris = load_iris()
Expand All @@ -180,13 +139,8 @@ def test_xgb_classifier_multi_discrete_int_labels(self):
self.assertTrue(conv_model is not None)
dump_data_and_model(
x_test.astype("float32"),
xgb,
conv_model,
basename="SklearnXGBClassifierMultiDiscreteIntLabels",
allow_failure="StrictVersion("
"onnx.__version__)"
"< StrictVersion('1.3.0')",
)
xgb, conv_model,
basename="SklearnXGBClassifierMultiDiscreteIntLabels")

def test_xgboost_booster_classifier_bin(self):
x, y = make_classification(n_classes=2, n_features=5,
Expand All @@ -202,9 +156,7 @@ def test_xgboost_booster_classifier_bin(self):
[('input', FloatTensorType([None, x.shape[1]]))],
target_opset=TARGET_OPSET)
dump_data_and_model(x_test.astype(np.float32),
model, model_onnx,
allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
basename="XGBBoosterMCl")
model, model_onnx, basename="XGBBoosterMCl")

def test_xgboost_booster_classifier_multiclass_softprob(self):
x, y = make_classification(n_classes=3, n_features=5,
Expand All @@ -221,9 +173,7 @@ def test_xgboost_booster_classifier_multiclass_softprob(self):
[('input', FloatTensorType([None, x.shape[1]]))],
target_opset=TARGET_OPSET)
dump_data_and_model(x_test.astype(np.float32),
model, model_onnx,
allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
basename="XGBBoosterMClSoftProb")
model, model_onnx, basename="XGBBoosterMClSoftProb")

def test_xgboost_booster_classifier_multiclass_softmax(self):
x, y = make_classification(n_classes=3, n_features=5,
Expand All @@ -240,9 +190,7 @@ def test_xgboost_booster_classifier_multiclass_softmax(self):
[('input', FloatTensorType([None, x.shape[1]]))],
target_opset=TARGET_OPSET)
dump_data_and_model(x_test.astype(np.float32),
model, model_onnx,
allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
basename="XGBBoosterMClSoftMax")
model, model_onnx, basename="XGBBoosterMClSoftMax")

def test_xgboost_booster_classifier_reg(self):
x, y = make_classification(n_classes=2, n_features=5,
Expand All @@ -259,9 +207,7 @@ def test_xgboost_booster_classifier_reg(self):
[('input', FloatTensorType([None, x.shape[1]]))],
target_opset=TARGET_OPSET)
dump_data_and_model(x_test.astype(np.float32),
model, model_onnx,
allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
basename="XGBBoosterReg")
model, model_onnx, basename="XGBBoosterReg")

def test_xgboost_10(self):
this = os.path.abspath(os.path.dirname(__file__))
Expand All @@ -279,9 +225,9 @@ def test_xgboost_10(self):
}

train_df = pandas.read_csv(train)
X_train, y_train = train_df.drop('label', axis=1).values, train_df['label'].values
X_train, y_train = train_df.drop('label', axis=1).values, train_df['label'].fillna(0).values
test_df = pandas.read_csv(test)
X_test, y_test = test_df.drop('label', axis=1).values, test_df['label'].values
X_test, y_test = test_df.drop('label', axis=1).values, test_df['label'].fillna(0).values

regressor = XGBRegressor(verbose=0, objective='reg:squarederror', **param_distributions)
regressor.fit(X_train, y_train)
Expand All @@ -292,9 +238,7 @@ def test_xgboost_10(self):
target_opset=TARGET_OPSET)

dump_data_and_model(
X_test.astype(np.float32),
regressor, model_onnx,
allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
X_test.astype(np.float32), regressor, model_onnx,
basename="XGBBoosterRegBug")

def test_xgboost_classifier_i5450(self):
Expand All @@ -315,9 +259,7 @@ def test_xgboost_classifier_i5450(self):
bst = clr.get_booster()
bst.dump_model('dump.raw.txt')
dump_data_and_model(
X_test.astype(np.float32) + 1e-5,
clr, onx,
allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
X_test.astype(np.float32) + 1e-5, clr, onx,
basename="XGBClassifierIris")

def test_xgboost_example_mnist(self):
Expand All @@ -342,7 +284,6 @@ def test_xgboost_example_mnist(self):

dump_data_and_model(
X_test.astype(np.float32), clf, onnx_model,
allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
basename="XGBoostExample")

def test_xgb_empty_tree(self):
Expand Down

0 comments on commit adc41ee

Please sign in to comment.