added sampling around instance option for continuous features in lime…

… tabular.
marcotcr · May 25, 2018 · 6946cbe · 6946cbe
1 parent 6808f82
commit 6946cbe
Showing 1 changed file with 10 additions and 1 deletion.
diff --git a/lime/lime_tabular.py b/lime/lime_tabular.py
@@ -109,6 +109,7 @@ def __init__(self,
                  feature_selection='auto',
                  discretize_continuous=True,
                  discretizer='quartile',
+                 sample_around_instance=False,
                  random_state=None):
         """Init function.
 
@@ -140,13 +141,18 @@ def __init__(self,
             discretizer: only matters if discretize_continuous is True. Options
                 are 'quartile', 'decile', 'entropy' or a BaseDiscretizer
                 instance.
+            sample_around_instance: if True, will sample continuous features
+                in perturbed samples from a normal centered at the instance
+                being explained. Otherwise, the normal is centered on the mean
+                of the feature data.
             random_state: an integer or numpy.RandomState that will be used to
                 generate random numbers. If None, the random state will be
                 initialized using the internal numpy seed.
         """
         self.random_state = check_random_state(random_state)
         self.mode = mode
         self.categorical_names = categorical_names or {}
+        self.sample_around_instance = sample_around_instance
 
         if categorical_features is None:
             categorical_features = []
@@ -402,7 +408,10 @@ def __data_inverse(self,
             data = self.random_state.normal(
                     0, 1, num_samples * data_row.shape[0]).reshape(
                     num_samples, data_row.shape[0])
-            data = data * self.scaler.scale_ + self.scaler.mean_
+            if self.sample_around_instance:
+                data = data * self.scaler.scale_ + data_row
+            else:
+                data = data * self.scaler.scale_ + self.scaler.mean_
             categorical_features = self.categorical_features
             first_row = data_row
         else: