From bbe17433b086f85478657d52973a1d6413aa672f Mon Sep 17 00:00:00 2001
From: Sebastian Ament <sebastianament@meta.com>
Date: Mon, 15 Apr 2024 14:51:56 -0700
Subject: [PATCH] Global Sensitivity Analysis for Categorical Features (#2357)

Summary:

Sobol sensitivity analysis and the sensitivity plots currently do not support categorical features. The plots in particular error out when categorical features are present because the sensitivity analysis (both first order and gradient-based).

This commit adds support for categorical features by
- introducing a uniformly random integer-valued point distribution for categorical and ordinal features in `SobolSensitivity`, in line with the usual integer-valued encoding,
-  ignoring the sign of the derivative-based sensitivity analysis of the categorical features, since the `CategoricalKernel` is non-differentiable, and the "direction" of categorical features is not well defined,
- adding a separate case for categorical features to the sensitivity analysis plot, stating that the categorical features "affect" but don't "increase" or "decrease" the metric.

Note that the results for the categorical features are still first order or total Sobol indices, so all the results in the plot are on the same scale.

Differential Revision: D56070326
---
 ax/plot/feature_importances.py                |  31 +++++-
 ax/utils/sensitivity/sobol_measures.py        |  39 +++++++
 .../sensitivity/tests/test_sensitivity.py     | 100 +++++++++++++-----
 3 files changed, 137 insertions(+), 33 deletions(-)

diff --git a/ax/plot/feature_importances.py b/ax/plot/feature_importances.py
index 67440a79eec..37143521baf 100644
--- a/ax/plot/feature_importances.py
+++ b/ax/plot/feature_importances.py
@@ -12,6 +12,7 @@
 import numpy as np
 import pandas as pd
 import plotly.graph_objs as go
+from ax.core.parameter import ChoiceParameter
 from ax.exceptions.core import NoDataError
 from ax.modelbridge import ModelBridge
 from ax.plot.base import AxPlotConfig, AxPlotTypes
@@ -143,6 +144,14 @@ def plot_feature_importance_by_feature_plotly(
         }
     traces = []
     dropdown = []
+    categorical_features = []
+    if model is not None:
+        categorical_features = [
+            name
+            for name, par in model.model_space.parameters.items()
+            if isinstance(par, ChoiceParameter)
+        ]
+
     for i, metric_name in enumerate(sorted(sensitivity_values.keys())):
         importances = sensitivity_values[metric_name]
         factor_col = "Factor"
@@ -157,7 +166,11 @@ def plot_feature_importance_by_feature_plotly(
                         factor_col: factor,
                         importance_col: np.asarray(importance)[0],
                         importance_col_se: np.asarray(importance)[2],
-                        sign_col: np.sign(np.asarray(importance)[0]).astype(int),
+                        sign_col: (
+                            0
+                            if factor in categorical_features
+                            else 2 * (np.asarray(importance)[0] >= 0).astype(int) - 1
+                        ),
                     }
                     for factor, importance in importances.items()
                 ]
@@ -172,7 +185,11 @@ def plot_feature_importance_by_feature_plotly(
                     {
                         factor_col: factor,
                         importance_col: importance,
-                        sign_col: np.sign(importance).astype(int),
+                        sign_col: (
+                            0
+                            if factor in categorical_features
+                            else 2 * (importance >= 0).astype(int) - 1
+                        ),
                     }
                     for factor, importance in importances.items()
                 ]
@@ -183,9 +200,13 @@ def plot_feature_importance_by_feature_plotly(
         if relative:
             df[importance_col] = df[importance_col].div(df[importance_col].sum())
 
-        colors = {-1: "darkorange", 1: "steelblue"}
-        names = {-1: "Decreases metric", 1: "Increases metric"}
-        legend_counter = {-1: 0, 1: 0}
+        colors = {-1: "darkorange", 0: "gray", 1: "steelblue"}
+        names = {
+            -1: "Decreases metric",
+            0: "Affects metric (categorical choice)",
+            1: "Increases metric",
+        }
+        legend_counter = {-1: 0, 0: 0, 1: 0}
         all_positive = all(df[sign_col] >= 0)
         for _, row in df.iterrows():
             traces.append(
diff --git a/ax/utils/sensitivity/sobol_measures.py b/ax/utils/sensitivity/sobol_measures.py
index d188cff6714..78aec5a66b9 100644
--- a/ax/utils/sensitivity/sobol_measures.py
+++ b/ax/utils/sensitivity/sobol_measures.py
@@ -6,6 +6,7 @@
 # pyre-strict
 
 from copy import deepcopy
+from functools import partial
 
 from typing import Any, Callable, Dict, List, Optional, Union
 
@@ -36,6 +37,7 @@ def __init__(
         second_order: bool = False,
         num_bootstrap_samples: int = 1,
         bootstrap_array: bool = False,
+        discrete_features: Optional[List[int]] = None,
     ) -> None:
         r"""Computes three types of Sobol indices:
         first order indices, total indices and second order indices (if specified ).
@@ -51,6 +53,9 @@ def __init__(
                 to be specified.
             bootstrap_array: If true, all the num_bootstrap_samples extimated indices
                 are returned instead of their mean and Var.
+            discrete_features: If specified, the inputs associated with the indices in
+                this list are generated using an integer-valued uniform distribution,
+                rather than the default (pseudo-)random continuous uniform distribution.
         """
         self.input_function = input_function
         self.dim: int = bounds.shape[-1]
@@ -71,6 +76,16 @@ def __init__(
         else:
             self.A = unnormalize(torch.rand(num_mc_samples, self.dim), bounds=bounds)
             self.B = unnormalize(torch.rand(num_mc_samples, self.dim), bounds=bounds)
+
+        # uniform integral distribution for discrete features
+        if discrete_features is not None:
+            all_low = bounds[0, discrete_features].to(dtype=torch.int).tolist()
+            all_high = (bounds[1, discrete_features]).to(dtype=torch.int).tolist()
+            for i, low, high in zip(discrete_features, all_low, all_high):
+                randint = partial(torch.randint, low=low, high=high + 1)
+                self.A[:, i] = randint(size=self.A.shape[:-1])
+                self.B[:, i] = randint(size=self.B.shape[:-1])
+
         # pyre-fixme[4]: Attribute must be annotated.
         self.A_B_ABi = self.generate_all_input_matrix().to(torch.double)
 
@@ -395,6 +410,7 @@ def __init__(
             [torch.Tensor, torch.Tensor], torch.Tensor
         ] = GaussianLinkMean,
         mini_batch_size: int = 128,
+        discrete_features: Optional[List[int]] = None,
     ) -> None:
         r"""Computes three types of Sobol indices:
         first order indices, total indices and second order indices (if specified ).
@@ -411,6 +427,9 @@ def __init__(
                 to be specified.
             mini_batch_size: The size of the mini-batches used while evaluating the
                 model posterior. Increasing this will increase the memory usage.
+            discrete_features: If specified, the inputs associated with the indices in
+                this list are generated using an integer-valued uniform distribution,
+                rather than the default (pseudo-)random continuous uniform distribution.
         """
         self.model = model
         self.second_order = second_order
@@ -438,6 +457,7 @@ def input_function(x: Tensor) -> Tensor:
             second_order=self.second_order,
             input_qmc=self.input_qmc,
             num_bootstrap_samples=self.num_bootstrap_samples,
+            discrete_features=discrete_features,
         )
         self.sensitivity.evalute_function()
 
@@ -486,6 +506,7 @@ def __init__(
         input_qmc: bool = False,
         gp_sample_qmc: bool = False,
         num_bootstrap_samples: int = 1,
+        discrete_features: Optional[List[int]] = None,
     ) -> None:
         r"""Computes three types of Sobol indices:
         first order indices, total indices and second order indices (if specified ).
@@ -502,6 +523,9 @@ def __init__(
                 SobolQMCNormalSampler.
             num_bootstrap_samples: If bootstrap is true, the number of bootstraps has
                 to be specified.
+            discrete_features: If specified, the inputs associated with the indices in
+                this list are generated using an integer-valued uniform distribution,
+                rather than the default (pseudo-)random continuous uniform distribution.
         """
         self.model = model
         self.second_order = second_order
@@ -519,6 +543,7 @@ def __init__(
             input_qmc=self.input_qmc,
             num_bootstrap_samples=self.num_bootstrap_samples,
             bootstrap_array=True,
+            discrete_features=discrete_features,
         )
         # TODO: Ideally, we would reduce the memory consumption here as well
         # but this is a tricky since it uses joint posterior sampling.
@@ -717,6 +742,7 @@ def compute_sobol_indices_from_model_list(
     model_list: List[Model],
     bounds: Tensor,
     order: str = "first",
+    discrete_features: Optional[List[int]] = None,
     **sobol_kwargs: Any,
 ) -> Tensor:
     """
@@ -728,6 +754,9 @@ def compute_sobol_indices_from_model_list(
         bounds: A 2 x d Tensor of lower and upper bounds of the domain of the models.
         order: A string specifying the order of the Sobol indices to be computed.
             Supports "first" and "total" and defaults to "first".
+        discrete_features: If specified, the inputs associated with the indices in
+            this list are generated using an integer-valued uniform distribution,
+            rather than the default (pseudo-)random continuous uniform distribution.
         sobol_kwargs: keyword arguments passed on to SobolSensitivityGPMean.
 
     Returns:
@@ -739,6 +768,7 @@ def compute_sobol_indices_from_model_list(
         sens_class = SobolSensitivityGPMean(
             model=model,
             bounds=bounds,
+            discrete_features=discrete_features,
             **sobol_kwargs,
         )
         indices.append(method(sens_class))
@@ -789,6 +819,7 @@ def ax_parameter_sens(
         model_list=model_list,
         bounds=bounds,
         order=order,
+        discrete_features=digest.categorical_features + digest.ordinal_features,
         **sobol_kwargs,
     )
     if signed:
@@ -797,6 +828,14 @@ def ax_parameter_sens(
             bounds=bounds,
             **sobol_kwargs,
         )
+        # categorical features don't have a direction, so we set the derivative to 1.0
+        # in order not to zero our their sensitivity. We treat categorical features
+        # separately in the sensitivity analysis plot as well, to make clear that they
+        # are affecting the metric, but neither increasing nor decreasing. Note that the
+        # orginal variables have a well defined direction, so we do not need to treat
+        # them differently here.
+        for i in digest.categorical_features:
+            ind_deriv[:, i] = 1.0
         ind *= torch.sign(ind_deriv)
     return _array_with_string_indices_to_dict(
         rows=metrics, cols=digest.feature_names, A=ind.numpy()
diff --git a/ax/utils/sensitivity/tests/test_sensitivity.py b/ax/utils/sensitivity/tests/test_sensitivity.py
index 7fd2c95d8f1..5e965457b0a 100644
--- a/ax/utils/sensitivity/tests/test_sensitivity.py
+++ b/ax/utils/sensitivity/tests/test_sensitivity.py
@@ -6,6 +6,7 @@
 # pyre-strict
 
 
+import copy
 import math
 from typing import cast
 from unittest.mock import patch, PropertyMock
@@ -270,35 +271,52 @@ def test_SobolGPMean(self) -> None:
                             )
         # Test with signed
         model_bridge = get_modelbridge(modular=True)
-        # Unsigned
+
+        # adding a categorical feature
+        cat_model_bridge = copy.deepcopy(model_bridge)
+        digest = cat_model_bridge.model.search_space_digest
+        digest.categorical_features = [0]
+
         sobol_kwargs = {"input_qmc": True, "num_mc_samples": 10}
-        ind_dict = ax_parameter_sens(
-            model_bridge,  # pyre-ignore
-            order="total",
-            signed=False,
-            **sobol_kwargs,  # pyre-ignore
-        )
-        ind_deriv = compute_derivatives_from_model_list(
-            model_list=[model_bridge.model.surrogate.model],
-            bounds=torch.tensor(model_bridge.model.search_space_digest.bounds).T,
-            **sobol_kwargs,
-        )
-        ind_dict_signed = ax_parameter_sens(
-            model_bridge,  # pyre-ignore
-            order="total",
-            # signed=True
-            **sobol_kwargs,  # pyre-ignore
-        )
-        for i, pname in enumerate(["x1", "x2"]):
-            self.assertEqual(
-                torch.sign(ind_deriv[0, i]).item(),
-                math.copysign(1, ind_dict_signed["branin"][pname]),
-            )
-            self.assertAlmostEqual(
-                (torch.sign(ind_deriv[0, i]) * ind_dict["branin"][pname]).item(),
-                ind_dict_signed["branin"][pname],
-            )  # signed
-            self.assertTrue(ind_dict["branin"][pname] >= 0)  # unsigned
+        seed = 1234
+        for bridge in [model_bridge, cat_model_bridge]:
+            with self.subTest(model_bridge=bridge):
+                torch.manual_seed(seed)
+                # Unsigned
+                ind_dict = ax_parameter_sens(
+                    bridge,  # pyre-ignore
+                    order="total",
+                    signed=False,
+                    **sobol_kwargs,  # pyre-ignore
+                )
+                ind_deriv = compute_derivatives_from_model_list(
+                    model_list=[bridge.model.surrogate.model],
+                    bounds=torch.tensor(bridge.model.search_space_digest.bounds).T,
+                    **sobol_kwargs,
+                )
+                torch.manual_seed(seed)  # reset seed to keep discrete features the same
+                cat_indices = bridge.model.search_space_digest.categorical_features
+                ind_dict_signed = ax_parameter_sens(
+                    bridge,  # pyre-ignore
+                    order="total",
+                    # signed=True
+                    **sobol_kwargs,  # pyre-ignore
+                )
+                for i, pname in enumerate(["x1", "x2"]):
+                    if i in cat_indices:  # special case for categorical features
+                        expected_sign = 1
+                    else:
+                        expected_sign = torch.sign(ind_deriv[0, i]).item()
+
+                    self.assertEqual(
+                        expected_sign,
+                        math.copysign(1, ind_dict_signed["branin"][pname]),
+                    )
+                    self.assertAlmostEqual(
+                        (expected_sign * ind_dict["branin"][pname]).item(),
+                        ind_dict_signed["branin"][pname],
+                    )  # signed
+                    self.assertTrue(ind_dict["branin"][pname] >= 0)  # unsigned
 
     def test_SobolGPSampling(self) -> None:
         bounds = torch.tensor([(0.0, 1.0) for _ in range(2)]).t()
@@ -349,6 +367,32 @@ def test_SobolGPSampling(self) -> None:
             total_order = sensitivity_sampling.total_order_indices()
             second_order = sensitivity_sampling.second_order_indices()
 
+        discrete_feature = 0
+        sensitivity_sampling_discrete = SobolSensitivityGPSampling(
+            self.model,
+            num_mc_samples=10,
+            num_gp_samples=10,
+            bounds=bounds,
+            second_order=True,
+            discrete_features=[discrete_feature],
+        )
+        sens = sensitivity_sampling_discrete.sensitivity
+        A = sens.A
+        B = sens.B
+        Arnd = A.round()
+        Brnd = B.round()
+        # testing that the discrete feature is integer valued
+        self.assertTrue(
+            torch.allclose(Arnd[:, discrete_feature], A[:, discrete_feature])
+        )
+        self.assertTrue(
+            torch.allclose(Brnd[:, discrete_feature], B[:, discrete_feature])
+        )
+
+        # testing that the other features are not integer valued
+        self.assertFalse(torch.allclose(Arnd, A))
+        self.assertFalse(torch.allclose(Brnd, B))
+
     def test_DerivativeGp(self) -> None:
         test_x = torch.rand(2, 2)
         posterior = posterior_derivative(self.model, test_x, kernel_type="matern")