Skip to content

Commit

Permalink
Add support for monotonic constraints.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 569229662
  • Loading branch information
achoum authored and copybara-github committed Sep 28, 2023
1 parent 63d3975 commit 9818b8f
Show file tree
Hide file tree
Showing 5 changed files with 225 additions and 14 deletions.
8 changes: 7 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## Head

### Features

- Add support for monotonic constraints.

## 1.6.0 2023-09-27

### Breaking Changes
Expand All @@ -9,7 +15,7 @@
### Features

- Compatibility with Tensorflow 2.14.0
- Contrib: Training preprocessing jointly on the input features, labels and
- Contrib: Training preprocessing jointly on the input features, labels and
weights

### Fix
Expand Down
2 changes: 2 additions & 0 deletions tensorflow_decision_forests/keras/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,10 @@ py_test(
"//tensorflow_decision_forests/component/model_plotter",
"//tensorflow_decision_forests/tensorflow:core",
"@ydf//yggdrasil_decision_forests/dataset:synthetic_dataset_py_proto",
"@ydf//yggdrasil_decision_forests/learner:abstract_learner_py_proto",
"@ydf//yggdrasil_decision_forests/learner/decision_tree:decision_tree_py_proto",
"@ydf//yggdrasil_decision_forests/learner/random_forest:random_forest_py_proto",
"@ydf//yggdrasil_decision_forests/model:abstract_model_py_proto",
],
)

Expand Down
1 change: 1 addition & 0 deletions tensorflow_decision_forests/keras/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
FeatureUsage = core.FeatureUsage
AdvancedArguments = core.AdvancedArguments
MultiTaskItem = core.MultiTaskItem
Monotonic = core.Monotonic

# Learning algorithm (called Models in Keras).

Expand Down
123 changes: 110 additions & 13 deletions tensorflow_decision_forests/keras/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,12 @@

import copy
from datetime import datetime # pylint: disable=g-importing-member
import enum
import functools
import inspect
import os
import tempfile
from typing import Optional, List, Dict, Any, Tuple, NamedTuple, Set, Union
from typing import Optional, List, Dict, Any, Tuple, NamedTuple, Set, Union, Literal

import tensorflow as tf

Expand All @@ -59,10 +60,10 @@
from tensorflow_decision_forests.component.inspector import inspector as inspector_lib
from tensorflow_decision_forests.component.tuner import tuner as tuner_lib
from tensorflow_decision_forests.keras import core_inference
from tensorflow_decision_forests.tensorflow import cc_logging
from tensorflow_decision_forests.tensorflow import core as tf_core
from tensorflow_decision_forests.tensorflow import tf1_compatibility
from tensorflow_decision_forests.tensorflow import tf_logging
from tensorflow_decision_forests.tensorflow import cc_logging
from tensorflow_decision_forests.tensorflow.ops.inference import api as tf_op
from tensorflow_decision_forests.tensorflow.ops.training import op as training_op
from yggdrasil_decision_forests.dataset import data_spec_pb2
Expand Down Expand Up @@ -125,6 +126,58 @@
# pylint: enable=protected-access


class Monotonic(enum.Enum):
"""Monotonic constraint between a feature and the model output."""

INCREASING = 1
DECREASING = 2


# Map between integer monotonic constraints (as commonly used by decision
# forests libraries) and Monotonic enum value.
_INTEGER_MONOTONIC_MAP = {
0: None,
1: Monotonic.INCREASING,
-1: Monotonic.DECREASING,
}

# Various ways for a user to specify a monotonic constraint.
MonotonicConstraint = Optional[Union[Monotonic, Literal[-1, 0, +1]]]


def _normalize_monotonic_constraint(
constraint: MonotonicConstraint,
) -> Optional[Monotonic]:
"""Normalizes monotonic constraints provided by the user.
Args:
constraint: User monotonic constraints.
Returns:
Normalized monotonic constraint.
Raises:
ValueError: If the user input is not a valid monotonic constraint.
"""

if isinstance(constraint, int):
if constraint not in _INTEGER_MONOTONIC_MAP:
raise ValueError(
"monotonic argument provided as integer should be one of"
f" {list(_INTEGER_MONOTONIC_MAP)!r}. Got {constraint!r} instead"
)
constraint = _INTEGER_MONOTONIC_MAP[constraint]

if constraint is None or isinstance(constraint, Monotonic):
return constraint

raise ValueError(
"Unexpected monotonic value. monotonic value can be 0, +1, -1, None,"
" Monotonic.INCREASING, or Monotonic.DECREASING. Got"
f" {constraint!r} instead"
)


class FeatureUsage(object):
"""Semantic and hyper-parameters for a single feature.
Expand Down Expand Up @@ -184,6 +237,13 @@ class FeatureUsage(object):
missing values in the training dataset. If the algorithm used to handle
missing values is not "GLOBAL_IMPUTATION" (default algorithm), this value
is ignored.
monotonic: Monotonic constraints between the feature and the model output.
Use `None` (default) for a non monotonic constrainted features.
`Monotonic.INCREASING` ensures the model is monotonically increasing with
the features. `Monotonic.DECREASING` ensures the model is monotonically
decreasing with the features. Alternatively, you can also use `0`, `+1`
and `-1` to respectively define a non-constrained, monotonically
increasing, and monotonically decreasing feature.
"""

def __init__(
Expand All @@ -194,10 +254,19 @@ def __init__(
max_vocab_count: Optional[int] = None,
min_vocab_frequency: Optional[int] = None,
override_global_imputation_value: Optional[str] = None,
monotonic: MonotonicConstraint = None,
):
self._name = name
self._semantic = semantic
self._guide = data_spec_pb2.ColumnGuide()
self._monotonic = _normalize_monotonic_constraint(monotonic)

if monotonic and semantic and semantic != FeatureSemantic.NUMERICAL:
raise ValueError(
f"Feature {name!r} with monotonic constraint is expected to have"
" semantic=NUMERICAL or semantic=None (default). Got"
f" semantic={semantic!r} instead."
)

# Check matching between hyper-parameters and semantic.
if semantic != FeatureSemantic.DISCRETIZED_NUMERICAL:
Expand Down Expand Up @@ -273,6 +342,10 @@ def semantic(self) -> FeatureSemantic:
def name(self) -> str:
return self._name

@property
def monotonic(self) -> Optional[Monotonic]:
return self._monotonic


class HyperParameterTemplate(NamedTuple):
"""Named and versionned set of hyper-parameters.
Expand Down Expand Up @@ -2056,17 +2129,8 @@ def _build_guide(

return guide

def _train_model(self, cluster_coordinator=None):
"""Effectively train the model."""

if self._normalized_input_feature_keys is None:
raise Exception("The training graph was not built.")

train_model_path = self._temp_directory
model_path = os.path.join(train_model_path, "model")

# Create the dataspec guide.
guide = self._build_guide()
def _effective_training_config(self) -> abstract_learner_pb2.TrainingConfig:
"""Assembles the training config to use for training."""

training_config = copy.deepcopy(
self._advanced_arguments.yggdrasil_training_config
Expand All @@ -2081,6 +2145,39 @@ def _train_model(self, cluster_coordinator=None):
feature_regex = tf_core.normalize_inputs_regexp(feature_key, False)
training_config.features.append(feature_regex)

# Monotonic constraints
for feature in self._features:
if not feature.monotonic:
continue

proto_direction = (
abstract_learner_pb2.MonotonicConstraint.INCREASING
if feature.monotonic == Monotonic.INCREASING
else abstract_learner_pb2.MonotonicConstraint.DECREASING
)

training_config.monotonic_constraints.append(
abstract_learner_pb2.MonotonicConstraint(
feature=tf_core.normalize_inputs_regexp(feature.name, False),
direction=proto_direction,
)
)
return training_config

def _train_model(self, cluster_coordinator=None):
"""Effectively train the model."""

if self._normalized_input_feature_keys is None:
raise Exception("The training graph was not built.")

train_model_path = self._temp_directory
model_path = os.path.join(train_model_path, "model")

# Create the dataspec guide.
guide = self._build_guide()

training_config = self._effective_training_config()

# Deployment configuration
deployment_config = copy.deepcopy(
self._advanced_arguments.yggdrasil_deployment_config
Expand Down
105 changes: 105 additions & 0 deletions tensorflow_decision_forests/keras/keras_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,10 @@
from tensorflow_decision_forests.keras import core
from tensorflow_decision_forests.tensorflow import core as tf_core
from yggdrasil_decision_forests.dataset import synthetic_dataset_pb2
from yggdrasil_decision_forests.learner import abstract_learner_pb2
from yggdrasil_decision_forests.learner.decision_tree import decision_tree_pb2
from yggdrasil_decision_forests.learner.random_forest import random_forest_pb2
from yggdrasil_decision_forests.model import abstract_model_pb2

layers = tf.keras.layers
models = tf.keras.models
Expand Down Expand Up @@ -2888,6 +2890,109 @@ def test_no_active_features(self):
model.fit(tf_dataset)
self.assertAllEqual(model.predict(tf_dataset), [[1], [1], [1], [1]])

def test_monotonic_constraints(self):
dataset = adult_dataset()
model = keras.GradientBoostedTreesModel(
features=[
keras.FeatureUsage("age", monotonic=+1),
keras.FeatureUsage("hours_per_week", monotonic=-1),
keras.FeatureUsage("education_num", monotonic=+1),
],
exclude_non_specified_features=True,
use_hessian_gain=True,
)

self._check_adult_model(
model=model,
dataset=dataset,
minimum_accuracy=0.790,
check_serialization=True,
)

# Need to be called after dataset ingestion
self.assertProtoEquals(
model._effective_training_config(),
abstract_learner_pb2.TrainingConfig(
learner="GRADIENT_BOOSTED_TREES",
features=["^age$", "^education_num$", "^hours_per_week$"],
label="^__LABEL$",
task=abstract_model_pb2.Task.CLASSIFICATION,
metadata=abstract_model_pb2.Metadata(framework="TF Keras"),
monotonic_constraints=[
abstract_learner_pb2.MonotonicConstraint(
feature="^age$",
direction=abstract_learner_pb2.MonotonicConstraint.INCREASING,
),
abstract_learner_pb2.MonotonicConstraint(
feature="^hours_per_week$",
direction=abstract_learner_pb2.MonotonicConstraint.DECREASING,
),
abstract_learner_pb2.MonotonicConstraint(
feature="^education_num$",
direction=abstract_learner_pb2.MonotonicConstraint.INCREASING,
),
],
),
)

def test_monotonic_normalize_value(self):
self.assertEqual(
keras.FeatureUsage("f", monotonic=+1).monotonic,
keras.Monotonic.INCREASING,
)
self.assertEqual(
keras.FeatureUsage("f", monotonic=-1).monotonic,
keras.Monotonic.DECREASING,
)
self.assertIsNone(keras.FeatureUsage("f", monotonic=0).monotonic)

def test_monotonic_bad_value(self):
with self.assertRaisesRegex(
ValueError,
"monotonic argument provided as integer should be one of \\[0, 1,"
" -1\\]\\. Got 5 instead",
):
keras.GradientBoostedTreesModel(
features=[keras.FeatureUsage("f1", monotonic=+5)]
)

def test_monotonic_bad_semantic(self):
with self.assertRaisesRegex(
ValueError,
"Feature 'f1' with monotonic constraint is expected to have"
" semantic=NUMERICAL",
):
keras.GradientBoostedTreesModel(
features=[
keras.FeatureUsage(
"f1", keras.FeatureSemantic.CATEGORICAL, monotonic=+1
)
]
)

def test_monotonic_non_compatible_learner(self):
model = keras.CartModel(features=[keras.FeatureUsage("f", monotonic=+1)])
pd_dataset = pd.DataFrame({"f": [0, 1], "l": [0, 1]})
tf_dataset = keras.pd_dataframe_to_tf_dataset(pd_dataset, label="l")
with self.assertRaisesRegex(
tf.errors.UnknownError,
"The learner CART does not support monotonic constraints",
):
model.fit(tf_dataset)

def test_monotonic_non_compatible_options(self):
model = keras.GradientBoostedTreesModel(
features=[keras.FeatureUsage("f", monotonic=+1)]
)
pd_dataset = pd.DataFrame({"f": [0, 1], "l": [0, 1]})
tf_dataset = keras.pd_dataframe_to_tf_dataset(pd_dataset, label="l")
with self.assertRaisesRegex(
tf.errors.UnknownError,
"Gradient Boosted Trees does not support monotonic constraints with"
" use_hessian_gain=false",
):
model.fit(tf_dataset)


if __name__ == "__main__":
tf.test.main()

0 comments on commit 9818b8f

Please sign in to comment.