Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the converter when parameter best_ntree_limit is used #429

Merged
merged 3 commits into from
Jan 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions onnxmltools/convert/xgboost/operator_converters/XGBoost.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,12 @@ def convert(scope, operator, container):

attr_pairs = XGBRegressorConverter._get_default_tree_attribute_pairs()
attr_pairs['base_values'] = [base_score]

bst = xgb_node.get_booster()
best_ntree_limit = getattr(bst, 'best_ntree_limit', len(js_trees))
if best_ntree_limit < len(js_trees):
js_trees = js_trees[:best_ntree_limit]

XGBConverter.fill_tree_attributes(js_trees, attr_pairs, [1 for _ in js_trees], False)

# add nodes
Expand Down Expand Up @@ -222,13 +228,19 @@ def convert(scope, operator, container):
objective, base_score, js_trees = XGBConverter.common_members(xgb_node, inputs)

params = XGBConverter.get_xgb_params(xgb_node)

attr_pairs = XGBClassifierConverter._get_default_tree_attribute_pairs()
XGBConverter.fill_tree_attributes(js_trees, attr_pairs, [1 for _ in js_trees], True)
ncl = (max(attr_pairs['class_treeids']) + 1) // params['n_estimators']

bst = xgb_node.get_booster()
best_ntree_limit = getattr(bst, 'best_ntree_limit', len(js_trees)) * ncl
if best_ntree_limit < len(js_trees):
js_trees = js_trees[:best_ntree_limit]
attr_pairs = XGBClassifierConverter._get_default_tree_attribute_pairs()
XGBConverter.fill_tree_attributes(js_trees, attr_pairs, [1 for _ in js_trees], True)

if len(attr_pairs['class_treeids']) == 0:
raise RuntimeError("XGBoost model is empty.")
ncl = (max(attr_pairs['class_treeids']) + 1) // params['n_estimators']
if ncl <= 1:
ncl = 2
# See https://github.com/dmlc/xgboost/blob/master/src/common/math.h#L23.
Expand Down
62 changes: 31 additions & 31 deletions tests/xgboost/test_xgboost_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from onnxmltools.convert import convert_xgboost
from onnxmltools.convert.common.data_types import FloatTensorType
from onnxmltools.utils import dump_data_and_model
from onnxruntime import InferenceSession


def _fit_classification_model(model, n_classes, is_str=False, dtype=None):
Expand All @@ -31,8 +32,6 @@ def _fit_classification_model(model, n_classes, is_str=False, dtype=None):

class TestXGBoostModels(unittest.TestCase):

@unittest.skipIf(sys.version_info[0] == 2,
reason="xgboost converter not tested on python 2")
def test_xgb_regressor(self):
iris = load_diabetes()
x = iris.data
Expand All @@ -42,7 +41,7 @@ def test_xgb_regressor(self):
xgb = XGBRegressor()
xgb.fit(x_train, y_train)
conv_model = convert_xgboost(
xgb, initial_types=[('input', FloatTensorType(shape=['None', 'None']))])
xgb, initial_types=[('input', FloatTensorType(shape=[None, None]))])
self.assertTrue(conv_model is not None)
dump_data_and_model(
x_test.astype("float32"),
Expand All @@ -54,12 +53,10 @@ def test_xgb_regressor(self):
"< StrictVersion('1.3.0')",
)

@unittest.skipIf(sys.version_info[0] == 2,
reason="xgboost converter not tested on python 2")
def test_xgb_classifier(self):
xgb, x_test = _fit_classification_model(XGBClassifier(), 2)
conv_model = convert_xgboost(
xgb, initial_types=[('input', FloatTensorType(shape=['None', 'None']))])
xgb, initial_types=[('input', FloatTensorType(shape=[None, None]))])
self.assertTrue(conv_model is not None)
dump_data_and_model(
x_test,
Expand All @@ -71,8 +68,6 @@ def test_xgb_classifier(self):
"< StrictVersion('1.3.0')",
)

@unittest.skipIf(sys.version_info[0] == 2,
reason="xgboost converter not tested on python 2")
def test_xgb_classifier_uint8(self):
xgb, x_test = _fit_classification_model(
XGBClassifier(), 2, dtype=np.uint8)
Expand All @@ -89,12 +84,10 @@ def test_xgb_classifier_uint8(self):
"< StrictVersion('1.3.0')",
)

@unittest.skipIf(sys.version_info[0] == 2,
reason="xgboost converter not tested on python 2")
def test_xgb_classifier_multi(self):
xgb, x_test = _fit_classification_model(XGBClassifier(), 3)
conv_model = convert_xgboost(
xgb, initial_types=[('input', FloatTensorType(shape=['None', 'None']))])
xgb, initial_types=[('input', FloatTensorType(shape=[None, None]))])
self.assertTrue(conv_model is not None)
dump_data_and_model(
x_test,
Expand All @@ -106,13 +99,11 @@ def test_xgb_classifier_multi(self):
"< StrictVersion('1.3.0')",
)

@unittest.skipIf(sys.version_info[0] == 2,
reason="xgboost converter not tested on python 2")
def test_xgb_classifier_multi_reglog(self):
xgb, x_test = _fit_classification_model(
XGBClassifier(objective='reg:logistic'), 4)
conv_model = convert_xgboost(
xgb, initial_types=[('input', FloatTensorType(shape=['None', 'None']))])
xgb, initial_types=[('input', FloatTensorType(shape=[None, None]))])
self.assertTrue(conv_model is not None)
dump_data_and_model(
x_test,
Expand All @@ -124,13 +115,11 @@ def test_xgb_classifier_multi_reglog(self):
"< StrictVersion('1.3.0')",
)

@unittest.skipIf(sys.version_info[0] == 2,
reason="xgboost converter not tested on python 2")
def test_xgb_classifier_reglog(self):
xgb, x_test = _fit_classification_model(
XGBClassifier(objective='reg:logistic'), 2)
conv_model = convert_xgboost(
xgb, initial_types=[('input', FloatTensorType(shape=['None', 'None']))])
xgb, initial_types=[('input', FloatTensorType(shape=[None, None]))])
self.assertTrue(conv_model is not None)
dump_data_and_model(
x_test,
Expand All @@ -142,13 +131,11 @@ def test_xgb_classifier_reglog(self):
"< StrictVersion('1.3.0')",
)

@unittest.skipIf(sys.version_info[0] == 2,
reason="xgboost converter not tested on python 2")
def test_xgb_classifier_multi_str_labels(self):
xgb, x_test = _fit_classification_model(
XGBClassifier(n_estimators=4), 5, is_str=True)
conv_model = convert_xgboost(
xgb, initial_types=[('input', FloatTensorType(shape=['None', 'None']))])
xgb, initial_types=[('input', FloatTensorType(shape=[None, None]))])
self.assertTrue(conv_model is not None)
dump_data_and_model(
x_test,
Expand All @@ -160,8 +147,6 @@ def test_xgb_classifier_multi_str_labels(self):
"< StrictVersion('1.3.0')",
)

@unittest.skipIf(sys.version_info[0] == 2,
reason="xgboost converter not tested on python 2")
def test_xgb_classifier_multi_discrete_int_labels(self):
iris = load_iris()
x = iris.data[:, :2]
Expand All @@ -176,7 +161,7 @@ def test_xgb_classifier_multi_discrete_int_labels(self):
xgb = XGBClassifier(n_estimators=3)
xgb.fit(x_train, y_train)
conv_model = convert_xgboost(
xgb, initial_types=[('input', FloatTensorType(shape=['None', 'None']))])
xgb, initial_types=[('input', FloatTensorType(shape=[None, None]))])
self.assertTrue(conv_model is not None)
dump_data_and_model(
x_test.astype("float32"),
Expand All @@ -188,8 +173,6 @@ def test_xgb_classifier_multi_discrete_int_labels(self):
"< StrictVersion('1.3.0')",
)

@unittest.skipIf(sys.version_info[0] == 2,
reason="xgboost converter not tested on python 2")
def test_xgboost_booster_classifier_bin(self):
x, y = make_classification(n_classes=2, n_features=5,
n_samples=100,
Expand All @@ -207,8 +190,6 @@ def test_xgboost_booster_classifier_bin(self):
allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
basename="XGBBoosterMCl")

@unittest.skipIf(sys.version_info[0] == 2,
reason="xgboost converter not tested on python 2")
def test_xgboost_booster_classifier_multiclass(self):
x, y = make_classification(n_classes=3, n_features=5,
n_samples=100,
Expand All @@ -227,8 +208,6 @@ def test_xgboost_booster_classifier_multiclass(self):
allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
basename="XGBBoosterMCl")

@unittest.skipIf(sys.version_info[0] == 2,
reason="xgboost converter not tested on python 2")
def test_xgboost_booster_classifier_reg(self):
x, y = make_classification(n_classes=2, n_features=5,
n_samples=100,
Expand All @@ -247,8 +226,6 @@ def test_xgboost_booster_classifier_reg(self):
allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
basename="XGBBoosterReg")

@unittest.skipIf(sys.version_info[0] == 2,
reason="xgboost converter not tested on python 2")
def test_xgboost_10(self):
this = os.path.abspath(os.path.dirname(__file__))
train = os.path.join(this, "input_fail_train.csv")
Expand Down Expand Up @@ -282,6 +259,29 @@ def test_xgboost_10(self):
allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
basename="XGBBoosterRegBug")

def test_xgboost_classifier_i5450(self):
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10)
clr = XGBClassifier(objective="multi:softmax", max_depth=1, n_estimators=2)
clr.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=40)
initial_type = [('float_input', FloatTensorType([None, 4]))]
onx = convert_xgboost(clr, initial_types=initial_type)
sess = InferenceSession(onx.SerializeToString())
input_name = sess.get_inputs()[0].name
label_name = sess.get_outputs()[1].name
predict_list = [1., 20., 466., 0.]
predict_array = np.array(predict_list).reshape((1,-1)).astype(np.float32)
pred_onx = sess.run([label_name], {input_name: predict_array})[0]
pred_xgboost = sessresults=clr.predict_proba(predict_array)
bst = clr.get_booster()
bst.dump_model('dump.raw.txt')
dump_data_and_model(
X_test.astype(np.float32) + 1e-5,
clr, onx,
allow_failure="StrictVersion(onnx.__version__) < StrictVersion('1.3.0')",
basename="XGBClassifierIris")

def test_xgboost_example_mnist(self):
"""
Train a simple xgboost model and store associated artefacts.
Expand Down