From 7b7e1e54b50824a22aae8937adc64a419e83999d Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Wed, 16 Feb 2022 04:11:00 +0300 Subject: [PATCH] document rounding behavior of floating point numbers in categorical features --- docs/Advanced-Topics.rst | 1 + python-package/lightgbm/basic.py | 2 ++ python-package/lightgbm/engine.py | 2 ++ python-package/lightgbm/sklearn.py | 1 + 4 files changed, 6 insertions(+) diff --git a/docs/Advanced-Topics.rst b/docs/Advanced-Topics.rst index 8ef239d22de4..b2ccc99fe656 100644 --- a/docs/Advanced-Topics.rst +++ b/docs/Advanced-Topics.rst @@ -25,6 +25,7 @@ Categorical Feature Support - Categorical features must be encoded as non-negative integers (``int``) less than ``Int32.MaxValue`` (2147483647). It is best to use a contiguous range of integers started from zero. + Floating point numbers in categorical features will be rounded towards 0. - Use ``min_data_per_group``, ``cat_smooth`` to deal with over-fitting (when ``#data`` is small or ``#category`` is large). diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 823c620de4eb..b164def5c997 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -1159,6 +1159,7 @@ def __init__(self, data, label=None, reference=None, Large values could be memory consuming. Consider using consecutive integers starting from zero. All negative values in categorical features will be treated as missing values. The output cannot be monotonically constrained with respect to a categorical feature. + Floating point numbers in categorical features will be rounded towards 0. params : dict or None, optional (default=None) Other parameters for Dataset. free_raw_data : bool, optional (default=True) @@ -3563,6 +3564,7 @@ def refit( Large values could be memory consuming. Consider using consecutive integers starting from zero. All negative values in categorical features will be treated as missing values. The output cannot be monotonically constrained with respect to a categorical feature. + Floating point numbers in categorical features will be rounded towards 0. dataset_params : dict or None, optional (default=None) Other parameters for Dataset ``data``. free_raw_data : bool, optional (default=True) diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index bffc2cc7c436..5405b534383c 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -109,6 +109,7 @@ def train( Large values could be memory consuming. Consider using consecutive integers starting from zero. All negative values in categorical features will be treated as missing values. The output cannot be monotonically constrained with respect to a categorical feature. + Floating point numbers in categorical features will be rounded towards 0. keep_training_booster : bool, optional (default=False) Whether the returned Booster will be used to keep training. If False, the returned value will be converted into _InnerPredictor before returning. @@ -463,6 +464,7 @@ def cv(params, train_set, num_boost_round=100, Large values could be memory consuming. Consider using consecutive integers starting from zero. All negative values in categorical features will be treated as missing values. The output cannot be monotonically constrained with respect to a categorical feature. + Floating point numbers in categorical features will be rounded towards 0. fpreproc : callable or None, optional (default=None) Preprocessing function that takes (dtrain, dtest, params) and returns transformed versions of those. diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index fa1769897736..800bfcb5079e 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -262,6 +262,7 @@ def __call__(self, preds, dataset): Large values could be memory consuming. Consider using consecutive integers starting from zero. All negative values in categorical features will be treated as missing values. The output cannot be monotonically constrained with respect to a categorical feature. + Floating point numbers in categorical features will be rounded towards 0. callbacks : list of callable, or None, optional (default=None) List of callback functions that are applied at each iteration. See Callbacks in Python API for more information.