diff --git a/CHANGELOG.md b/CHANGELOG.md index 085bc7b..45c3816 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## Head + +### Features + +- Add support for monotonic constraints. + ## 1.6.0 2023-09-27 ### Breaking Changes @@ -9,7 +15,7 @@ ### Features - Compatibility with Tensorflow 2.14.0 -- Contrib: Training preprocessing jointly on the input features, labels and +- Contrib: Training preprocessing jointly on the input features, labels and weights ### Fix diff --git a/tensorflow_decision_forests/keras/BUILD b/tensorflow_decision_forests/keras/BUILD index d8dd04d..8acf5c8 100644 --- a/tensorflow_decision_forests/keras/BUILD +++ b/tensorflow_decision_forests/keras/BUILD @@ -134,8 +134,10 @@ py_test( "//tensorflow_decision_forests/component/model_plotter", "//tensorflow_decision_forests/tensorflow:core", "@ydf//yggdrasil_decision_forests/dataset:synthetic_dataset_py_proto", + "@ydf//yggdrasil_decision_forests/learner:abstract_learner_py_proto", "@ydf//yggdrasil_decision_forests/learner/decision_tree:decision_tree_py_proto", "@ydf//yggdrasil_decision_forests/learner/random_forest:random_forest_py_proto", + "@ydf//yggdrasil_decision_forests/model:abstract_model_py_proto", ], ) diff --git a/tensorflow_decision_forests/keras/__init__.py b/tensorflow_decision_forests/keras/__init__.py index 049c5a7..7da378c 100644 --- a/tensorflow_decision_forests/keras/__init__.py +++ b/tensorflow_decision_forests/keras/__init__.py @@ -60,6 +60,7 @@ FeatureUsage = core.FeatureUsage AdvancedArguments = core.AdvancedArguments MultiTaskItem = core.MultiTaskItem +Monotonic = core.Monotonic # Learning algorithm (called Models in Keras). diff --git a/tensorflow_decision_forests/keras/core.py b/tensorflow_decision_forests/keras/core.py index c58832f..f11739b 100644 --- a/tensorflow_decision_forests/keras/core.py +++ b/tensorflow_decision_forests/keras/core.py @@ -46,11 +46,12 @@ import copy from datetime import datetime # pylint: disable=g-importing-member +import enum import functools import inspect import os import tempfile -from typing import Optional, List, Dict, Any, Tuple, NamedTuple, Set, Union +from typing import Optional, List, Dict, Any, Tuple, NamedTuple, Set, Union, Literal import tensorflow as tf @@ -59,10 +60,10 @@ from tensorflow_decision_forests.component.inspector import inspector as inspector_lib from tensorflow_decision_forests.component.tuner import tuner as tuner_lib from tensorflow_decision_forests.keras import core_inference +from tensorflow_decision_forests.tensorflow import cc_logging from tensorflow_decision_forests.tensorflow import core as tf_core from tensorflow_decision_forests.tensorflow import tf1_compatibility from tensorflow_decision_forests.tensorflow import tf_logging -from tensorflow_decision_forests.tensorflow import cc_logging from tensorflow_decision_forests.tensorflow.ops.inference import api as tf_op from tensorflow_decision_forests.tensorflow.ops.training import op as training_op from yggdrasil_decision_forests.dataset import data_spec_pb2 @@ -125,6 +126,58 @@ # pylint: enable=protected-access +class Monotonic(enum.Enum): + """Monotonic constraint between a feature and the model output.""" + + INCREASING = 1 + DECREASING = 2 + + +# Map between integer monotonic constraints (as commonly used by decision +# forests libraries) and Monotonic enum value. +_INTEGER_MONOTONIC_MAP = { + 0: None, + 1: Monotonic.INCREASING, + -1: Monotonic.DECREASING, +} + +# Various ways for a user to specify a monotonic constraint. +MonotonicConstraint = Optional[Union[Monotonic, Literal[-1, 0, +1]]] + + +def _normalize_monotonic_constraint( + constraint: MonotonicConstraint, +) -> Optional[Monotonic]: + """Normalizes monotonic constraints provided by the user. + + Args: + constraint: User monotonic constraints. + + Returns: + Normalized monotonic constraint. + + Raises: + ValueError: If the user input is not a valid monotonic constraint. + """ + + if isinstance(constraint, int): + if constraint not in _INTEGER_MONOTONIC_MAP: + raise ValueError( + "monotonic argument provided as integer should be one of" + f" {list(_INTEGER_MONOTONIC_MAP)!r}. Got {constraint!r} instead" + ) + constraint = _INTEGER_MONOTONIC_MAP[constraint] + + if constraint is None or isinstance(constraint, Monotonic): + return constraint + + raise ValueError( + "Unexpected monotonic value. monotonic value can be 0, +1, -1, None," + " Monotonic.INCREASING, or Monotonic.DECREASING. Got" + f" {constraint!r} instead" + ) + + class FeatureUsage(object): """Semantic and hyper-parameters for a single feature. @@ -184,6 +237,13 @@ class FeatureUsage(object): missing values in the training dataset. If the algorithm used to handle missing values is not "GLOBAL_IMPUTATION" (default algorithm), this value is ignored. + monotonic: Monotonic constraints between the feature and the model output. + Use `None` (default) for a non monotonic constrainted features. + `Monotonic.INCREASING` ensures the model is monotonically increasing with + the features. `Monotonic.DECREASING` ensures the model is monotonically + decreasing with the features. Alternatively, you can also use `0`, `+1` + and `-1` to respectively define a non-constrained, monotonically + increasing, and monotonically decreasing feature. """ def __init__( @@ -194,10 +254,19 @@ def __init__( max_vocab_count: Optional[int] = None, min_vocab_frequency: Optional[int] = None, override_global_imputation_value: Optional[str] = None, + monotonic: MonotonicConstraint = None, ): self._name = name self._semantic = semantic self._guide = data_spec_pb2.ColumnGuide() + self._monotonic = _normalize_monotonic_constraint(monotonic) + + if monotonic and semantic and semantic != FeatureSemantic.NUMERICAL: + raise ValueError( + f"Feature {name!r} with monotonic constraint is expected to have" + " semantic=NUMERICAL or semantic=None (default). Got" + f" semantic={semantic!r} instead." + ) # Check matching between hyper-parameters and semantic. if semantic != FeatureSemantic.DISCRETIZED_NUMERICAL: @@ -273,6 +342,10 @@ def semantic(self) -> FeatureSemantic: def name(self) -> str: return self._name + @property + def monotonic(self) -> Optional[Monotonic]: + return self._monotonic + class HyperParameterTemplate(NamedTuple): """Named and versionned set of hyper-parameters. @@ -2056,17 +2129,8 @@ def _build_guide( return guide - def _train_model(self, cluster_coordinator=None): - """Effectively train the model.""" - - if self._normalized_input_feature_keys is None: - raise Exception("The training graph was not built.") - - train_model_path = self._temp_directory - model_path = os.path.join(train_model_path, "model") - - # Create the dataspec guide. - guide = self._build_guide() + def _effective_training_config(self) -> abstract_learner_pb2.TrainingConfig: + """Assembles the training config to use for training.""" training_config = copy.deepcopy( self._advanced_arguments.yggdrasil_training_config @@ -2081,6 +2145,39 @@ def _train_model(self, cluster_coordinator=None): feature_regex = tf_core.normalize_inputs_regexp(feature_key, False) training_config.features.append(feature_regex) + # Monotonic constraints + for feature in self._features: + if not feature.monotonic: + continue + + proto_direction = ( + abstract_learner_pb2.MonotonicConstraint.INCREASING + if feature.monotonic == Monotonic.INCREASING + else abstract_learner_pb2.MonotonicConstraint.DECREASING + ) + + training_config.monotonic_constraints.append( + abstract_learner_pb2.MonotonicConstraint( + feature=tf_core.normalize_inputs_regexp(feature.name, False), + direction=proto_direction, + ) + ) + return training_config + + def _train_model(self, cluster_coordinator=None): + """Effectively train the model.""" + + if self._normalized_input_feature_keys is None: + raise Exception("The training graph was not built.") + + train_model_path = self._temp_directory + model_path = os.path.join(train_model_path, "model") + + # Create the dataspec guide. + guide = self._build_guide() + + training_config = self._effective_training_config() + # Deployment configuration deployment_config = copy.deepcopy( self._advanced_arguments.yggdrasil_deployment_config diff --git a/tensorflow_decision_forests/keras/keras_test.py b/tensorflow_decision_forests/keras/keras_test.py index ff43376..85442d6 100644 --- a/tensorflow_decision_forests/keras/keras_test.py +++ b/tensorflow_decision_forests/keras/keras_test.py @@ -40,8 +40,10 @@ from tensorflow_decision_forests.keras import core from tensorflow_decision_forests.tensorflow import core as tf_core from yggdrasil_decision_forests.dataset import synthetic_dataset_pb2 +from yggdrasil_decision_forests.learner import abstract_learner_pb2 from yggdrasil_decision_forests.learner.decision_tree import decision_tree_pb2 from yggdrasil_decision_forests.learner.random_forest import random_forest_pb2 +from yggdrasil_decision_forests.model import abstract_model_pb2 layers = tf.keras.layers models = tf.keras.models @@ -2888,6 +2890,109 @@ def test_no_active_features(self): model.fit(tf_dataset) self.assertAllEqual(model.predict(tf_dataset), [[1], [1], [1], [1]]) + def test_monotonic_constraints(self): + dataset = adult_dataset() + model = keras.GradientBoostedTreesModel( + features=[ + keras.FeatureUsage("age", monotonic=+1), + keras.FeatureUsage("hours_per_week", monotonic=-1), + keras.FeatureUsage("education_num", monotonic=+1), + ], + exclude_non_specified_features=True, + use_hessian_gain=True, + ) + + self._check_adult_model( + model=model, + dataset=dataset, + minimum_accuracy=0.790, + check_serialization=True, + ) + + # Need to be called after dataset ingestion + self.assertProtoEquals( + model._effective_training_config(), + abstract_learner_pb2.TrainingConfig( + learner="GRADIENT_BOOSTED_TREES", + features=["^age$", "^education_num$", "^hours_per_week$"], + label="^__LABEL$", + task=abstract_model_pb2.Task.CLASSIFICATION, + metadata=abstract_model_pb2.Metadata(framework="TF Keras"), + monotonic_constraints=[ + abstract_learner_pb2.MonotonicConstraint( + feature="^age$", + direction=abstract_learner_pb2.MonotonicConstraint.INCREASING, + ), + abstract_learner_pb2.MonotonicConstraint( + feature="^hours_per_week$", + direction=abstract_learner_pb2.MonotonicConstraint.DECREASING, + ), + abstract_learner_pb2.MonotonicConstraint( + feature="^education_num$", + direction=abstract_learner_pb2.MonotonicConstraint.INCREASING, + ), + ], + ), + ) + + def test_monotonic_normalize_value(self): + self.assertEqual( + keras.FeatureUsage("f", monotonic=+1).monotonic, + keras.Monotonic.INCREASING, + ) + self.assertEqual( + keras.FeatureUsage("f", monotonic=-1).monotonic, + keras.Monotonic.DECREASING, + ) + self.assertIsNone(keras.FeatureUsage("f", monotonic=0).monotonic) + + def test_monotonic_bad_value(self): + with self.assertRaisesRegex( + ValueError, + "monotonic argument provided as integer should be one of \\[0, 1," + " -1\\]\\. Got 5 instead", + ): + keras.GradientBoostedTreesModel( + features=[keras.FeatureUsage("f1", monotonic=+5)] + ) + + def test_monotonic_bad_semantic(self): + with self.assertRaisesRegex( + ValueError, + "Feature 'f1' with monotonic constraint is expected to have" + " semantic=NUMERICAL", + ): + keras.GradientBoostedTreesModel( + features=[ + keras.FeatureUsage( + "f1", keras.FeatureSemantic.CATEGORICAL, monotonic=+1 + ) + ] + ) + + def test_monotonic_non_compatible_learner(self): + model = keras.CartModel(features=[keras.FeatureUsage("f", monotonic=+1)]) + pd_dataset = pd.DataFrame({"f": [0, 1], "l": [0, 1]}) + tf_dataset = keras.pd_dataframe_to_tf_dataset(pd_dataset, label="l") + with self.assertRaisesRegex( + tf.errors.UnknownError, + "The learner CART does not support monotonic constraints", + ): + model.fit(tf_dataset) + + def test_monotonic_non_compatible_options(self): + model = keras.GradientBoostedTreesModel( + features=[keras.FeatureUsage("f", monotonic=+1)] + ) + pd_dataset = pd.DataFrame({"f": [0, 1], "l": [0, 1]}) + tf_dataset = keras.pd_dataframe_to_tf_dataset(pd_dataset, label="l") + with self.assertRaisesRegex( + tf.errors.UnknownError, + "Gradient Boosted Trees does not support monotonic constraints with" + " use_hessian_gain=false", + ): + model.fit(tf_dataset) + if __name__ == "__main__": tf.test.main()