Add 2d array support to DRLearner

py-why · Jan 10, 2020 · 848cd8c · 848cd8c
1 parent 8370694
commit 848cd8c
Show file tree

Hide file tree

Showing 3 changed files with 101 additions and 99 deletions.
diff --git a/econml/drlearner.py b/econml/drlearner.py
@@ -29,15 +29,14 @@
 
 import numpy as np
 from warnings import warn
+
+from sklearn.base import clone
 from sklearn.linear_model import LogisticRegressionCV, LinearRegression, LassoCV
-from econml.utilities import inverse_onehot, check_high_dimensional
-from econml.sklearn_extensions.linear_model import WeightedLassoCV, DebiasedLasso
+from econml.utilities import inverse_onehot, check_high_dimensional, StatsModelsLinearRegression
+from econml.sklearn_extensions.linear_model import WeightedLassoCVWrapper, DebiasedLasso
 from econml.sklearn_extensions.ensemble import SubsampledHonestForest
-from sklearn.base import clone
 from econml._ortho_learner import _OrthoLearner
 from econml.cate_estimator import StatsModelsCateEstimatorDiscreteMixin, DebiasedLassoCateEstimatorDiscreteMixin
-from econml.utilities import StatsModelsLinearRegression
-from sklearn.preprocessing import PolynomialFeatures
 from econml.inference import GenericModelFinalInferenceDiscrete
 
 
@@ -243,7 +242,7 @@ class takes as input the parameter ``model_regressor``, which is an arbitrary sc
     """
 
     def __init__(self, model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'),
-                 model_regression=WeightedLassoCV(cv=3),
+                 model_regression=WeightedLassoCVWrapper(cv=3),
                  model_final=StatsModelsLinearRegression(),
                  multitask_model_final=False,
                  featurizer=None,
@@ -258,8 +257,9 @@ def _combine(self, X, W):
                 return np.hstack([arr for arr in [X, W] if arr is not None])
 
             def fit(self, Y, T, X=None, W=None, *, sample_weight=None):
-                # TODO Allow for non-vector y, i.e. of shape (n, 1)
-                assert np.ndim(Y) == 1, "Can only accept single dimensional outcomes Y! Use Y.ravel()."
+                if Y.ndim > 1 and Y.shape[1] > 1:
+                    raise ValueError("The outcome matrix must be of shape ({0}, ) or ({0}, 1), "
+                                     "instead got {1}.".format(len(X), Y.shape))
                 if (X is None) and (W is None):
                     raise AttributeError("At least one of X or W has to not be None!")
                 if np.any(np.all(T == 0, axis=0)) or (not np.any(np.all(T == 0, axis=1))):
@@ -274,16 +274,17 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None):
             def predict(self, Y, T, X=None, W=None, *, sample_weight=None):
                 XW = self._combine(X, W)
                 propensities = self._model_propensity.predict_proba(XW)
+                n = T.shape[0]
                 Y_pred = np.zeros((T.shape[0], T.shape[1] + 1))
                 T_counter = np.zeros(T.shape)
-                Y_pred[:, 0] = self._model_regression.predict(np.hstack([XW, T_counter]))
-                Y_pred[:, 0] += (Y - Y_pred[:, 0]) * np.all(T == 0, axis=1) / propensities[:, 0]
+                Y_pred[:, 0] = self._model_regression.predict(np.hstack([XW, T_counter])).reshape(n)
+                Y_pred[:, 0] += (Y.reshape(n) - Y_pred[:, 0]) * np.all(T == 0, axis=1) / propensities[:, 0]
                 for t in np.arange(T.shape[1]):
                     T_counter = np.zeros(T.shape)
                     T_counter[:, t] = 1
-                    Y_pred[:, t + 1] = self._model_regression.predict(np.hstack([XW, T_counter]))
-                    Y_pred[:, t + 1] += (Y - Y_pred[:, t + 1]) * (T[:, t] == 1) / propensities[:, t + 1]
-                return Y_pred
+                    Y_pred[:, t + 1] = self._model_regression.predict(np.hstack([XW, T_counter])).reshape(n)
+                    Y_pred[:, t + 1] += (Y.reshape(n) - Y_pred[:, t + 1]) * (T[:, t] == 1) / propensities[:, t + 1]
+                return Y_pred.reshape(Y.shape + (T.shape[1] + 1,))
 
         class ModelFinal:
             # Coding Remark: The reasoning around the multitask_model_final could have been simplified if
@@ -303,11 +304,11 @@ def fit(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, sample_var
                     X = self._featurizer.fit_transform(X)
                 filtered_kwargs = _filter_none_kwargs(sample_weight=sample_weight, sample_var=sample_var)
                 if self._multitask_model_final:
-                    self.model_cate = self._model_final.fit(X, Y_pred[:, 1:] - Y_pred[:, [0]], **filtered_kwargs)
+                    self.model_cate = self._model_final.fit(X, Y_pred[..., 1:] - Y_pred[..., [0]], **filtered_kwargs)
                 else:
-                    self.models_cate = [clone(self._model_final, safe=False).fit(X, Y_pred[:, t] - Y_pred[:, 0],
+                    self.models_cate = [clone(self._model_final, safe=False).fit(X, Y_pred[..., t] - Y_pred[..., 0],
                                                                                  **filtered_kwargs)
-                                        for t in np.arange(1, Y_pred.shape[1])]
+                                        for t in np.arange(1, Y_pred.shape[-1])]
                 return self
 
             def predict(self, X=None):
@@ -316,19 +317,21 @@ def predict(self, X=None):
                 if self._multitask_model_final:
                     return self.model_cate.predict(X)
                 else:
-                    return np.array([mdl.predict(X) for mdl in self.models_cate]).T
+                    preds = np.array([mdl.predict(X) for mdl in self.models_cate])
+                    return np.moveaxis(preds, 0, -1)  # move treatment dim to end
 
             def score(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, sample_var=None):
                 if (X is not None) and (self._featurizer is not None):
                     X = self._featurizer.transform(X)
                 Y_pred, = nuisances
                 if self._multitask_model_final:
-                    return np.mean(np.average((Y_pred[:, 1:] - Y_pred[:, [0]] - self.model_cate.predict(X))**2,
+                    return np.mean(np.average((Y_pred[..., 1:] - Y_pred[..., [0]] - self.model_cate.predict(X))**2,
                                               weights=sample_weight, axis=0))
                 else:
-                    return np.mean([np.average((Y_pred[:, t] - Y_pred[:, 0] - self.models_cate[t - 1].predict(X))**2,
+                    return np.mean([np.average((Y_pred[..., t] - Y_pred[..., 0] -
+                                                self.models_cate[t - 1].predict(X))**2,
                                                weights=sample_weight, axis=0)
-                                    for t in np.arange(1, Y_pred.shape[1])])
+                                    for t in np.arange(1, Y_pred.shape[-1])])
 
         self._multitask_model_final = multitask_model_final
         super().__init__(ModelNuisance(model_propensity, model_regression),
@@ -615,7 +618,7 @@ class LinearDRLearner(StatsModelsCateEstimatorDiscreteMixin, DRLearner):
 
     def __init__(self,
                  model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'),
-                 model_regression=WeightedLassoCV(cv=3),
+                 model_regression=WeightedLassoCVWrapper(cv=3),
                  featurizer=None,
                  fit_cate_intercept=True,
                  n_splits=2, random_state=None):
@@ -806,7 +809,7 @@ class SparseLinearDRLearner(DebiasedLassoCateEstimatorDiscreteMixin, DRLearner):
 
     def __init__(self,
                  model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'),
-                 model_regression=WeightedLassoCV(cv=3),
+                 model_regression=WeightedLassoCVWrapper(cv=3),
                  featurizer=None,
                  fit_cate_intercept=True,
                  alpha='auto',

diff --git a/econml/inference.py b/econml/inference.py
@@ -221,19 +221,19 @@ def const_marginal_effect_interval(self, X, *, alpha=0.1):
         if (X is not None) and (self.featurizer is not None):
             X = self.featurizer.fit_transform(X)
         preds = np.array([mdl.predict_interval(X, alpha=alpha) for mdl in self.fitted_models_final])
-        return tuple([preds[:, 0, :].T, preds[:, 1, :].T])
+        return tuple(np.moveaxis(preds, [0, 1], [-1, 0]))  # send treatment to the end, pull bounds to the front
 
     def effect_interval(self, X, *, T0, T1, alpha=0.1):
         X, T0, T1 = self._est._expand_treatments(X, T0, T1)
         if np.any(np.any(T0 > 0, axis=1)):
             raise AttributeError("Can only calculate intervals of effects with respect to baseline treatment!")
-        ind = (T1 @ np.arange(1, T1.shape[1] + 1)).astype(int)
+        ind = inverse_onehot(T1)
         lower, upper = self.const_marginal_effect_interval(X, alpha=alpha)
-        lower = np.hstack([np.zeros((lower.shape[0], 1)), lower])
-        upper = np.hstack([np.zeros((upper.shape[0], 1)), upper])
+        lower = np.concatenate([np.zeros(lower.shape[0:-1] + (1,)), lower], -1)
+        upper = np.concatenate([np.zeros(upper.shape[0:-1] + (1,)), upper], -1)
         if X is None:  # Then const_marginal_effect_interval will return a single row
-            lower, upper = np.tile(lower, (T0.shape[0], 1)), np.tile(upper, (T0.shape[0], 1))
-        return lower[np.arange(T0.shape[0]), ind], upper[np.arange(T0.shape[0]), ind]
+            lower, upper = np.repeat(lower, T0.shape[0], axis=0), np.repeat(upper, T0.shape[0], axis=0)
+        return lower[np.arange(T0.shape[0]), ..., ind], upper[np.arange(T0.shape[0]), ..., ind]
 
 
 class LinearModelFinalInferenceDiscrete(GenericModelFinalInferenceDiscrete):

diff --git a/econml/tests/test_drlearner.py b/econml/tests/test_drlearner.py
@@ -70,77 +70,76 @@ def make_random(is_discrete, d):
             else:
                 return np.random.normal(size=sz)
 
-        d_y = 0
-        is_discrete = True
-        for d_t in [0, 1]:
-            for d_x in [2, None]:
-                for d_w in [2, None]:
-                    with self.subTest(d_t=d_t, d_x=d_x, d_w=d_w):
-                        W, X, Y, T = [make_random(is_discrete, d)
-                                      for is_discrete, d in [(False, d_w),
-                                                             (False, d_x),
-                                                             (False, d_y),
-                                                             (is_discrete, d_t)]]
-
-                        if (X is None) and (W is None):
-                            continue
-                        d_t_final = 2 if is_discrete else d_t
-
-                        effect_shape = (n,) + ((d_y,) if d_y > 0 else ())
-                        marginal_effect_shape = ((n,) +
-                                                 ((d_y,) if d_y > 0 else ()) +
-                                                 ((d_t_final,) if d_t_final > 0 else ()))
-
-                        # since T isn't passed to const_marginal_effect, defaults to one row if X is None
-                        const_marginal_effect_shape = ((n if d_x else 1,) +
-                                                       ((d_y,) if d_y > 0 else ()) +
-                                                       ((d_t_final,) if d_t_final > 0 else()))
-
-                        # TODO: add stratification to bootstrap so that we can use it even with discrete treatments
-                        infs = [None, 'statsmodels']
-
-                        est = LinearDRLearner(model_regression=Lasso(),
-                                              model_propensity=LogisticRegression(C=1000, solver='lbfgs',
-                                                                                  multi_class='auto'))
-
-                        for inf in infs:
-                            with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t,
-                                              is_discrete=is_discrete, est=est, inf=inf):
-                                est.fit(Y, T, X, W, inference=inf)
-                                # make sure we can call the marginal_effect and effect methods
-                                const_marg_eff = est.const_marginal_effect(X)
-                                marg_eff = est.marginal_effect(T, X)
-                                self.assertEqual(shape(marg_eff), marginal_effect_shape)
-                                self.assertEqual(shape(const_marg_eff), const_marginal_effect_shape)
-
-                                np.testing.assert_array_equal(
-                                    marg_eff if d_x else marg_eff[0:1], const_marg_eff)
-
-                                T0 = np.full_like(T, 'a')
-                                eff = est.effect(X, T0=T0, T1=T)
-                                self.assertEqual(shape(eff), effect_shape)
-                                if inf is not None:
-                                    const_marg_eff_int = est.const_marginal_effect_interval(X)
-                                    marg_eff_int = est.marginal_effect_interval(T, X)
-                                    self.assertEqual(shape(marg_eff_int),
-                                                     (2,) + marginal_effect_shape)
-                                    self.assertEqual(shape(const_marg_eff_int),
-                                                     (2,) + const_marginal_effect_shape)
-                                    self.assertEqual(shape(est.effect_interval(X, T0=T0, T1=T)),
-                                                     (2,) + effect_shape)
-
-                                est.score(Y, T, X, W)
-
-                                # make sure we can call effect with implied scalar treatments, no matter the
-                                # dimensions of T, and also that we warn when there are multiple treatments
-                                if d_t > 1:
-                                    cm = self.assertWarns(Warning)
-                                else:
-                                    cm = ExitStack()  # ExitStack can be used as a "do nothing" ContextManager
-                                with cm:
-                                    effect_shape2 = (n if d_x else 1,) + ((d_y,) if d_y > 0 else())
-                                    eff = est.effect(X, T0='a', T1='b')
-                                    self.assertEqual(shape(eff), effect_shape2)
+        for d_y in [0, 1]:
+            is_discrete = True
+            for d_t in [0, 1]:
+                for d_x in [2, None]:
+                    for d_w in [2, None]:
+                        with self.subTest(d_t=d_t, d_x=d_x, d_w=d_w):
+                            W, X, Y, T = [make_random(is_discrete, d)
+                                          for is_discrete, d in [(False, d_w),
+                                                                 (False, d_x),
+                                                                 (False, d_y),
+                                                                 (is_discrete, d_t)]]
+
+                            if (X is None) and (W is None):
+                                continue
+                            d_t_final = 2 if is_discrete else d_t
+
+                            effect_shape = (n,) + ((d_y,) if d_y > 0 else ())
+                            marginal_effect_shape = ((n,) +
+                                                     ((d_y,) if d_y > 0 else ()) +
+                                                     ((d_t_final,) if d_t_final > 0 else ()))
+
+                            # since T isn't passed to const_marginal_effect, defaults to one row if X is None
+                            const_marginal_effect_shape = ((n if d_x else 1,) +
+                                                           ((d_y,) if d_y > 0 else ()) +
+                                                           ((d_t_final,) if d_t_final > 0 else()))
+
+                            # TODO: add stratification to bootstrap so that we can use it even with discrete treatments
+                            infs = [None, 'statsmodels']
+
+                            est = LinearDRLearner(model_propensity=LogisticRegression(C=1000, solver='lbfgs',
+                                                                                      multi_class='auto'))
+
+                            for inf in infs:
+                                with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t,
+                                                  is_discrete=is_discrete, est=est, inf=inf):
+                                    est.fit(Y, T, X, W, inference=inf)
+                                    # make sure we can call the marginal_effect and effect methods
+                                    const_marg_eff = est.const_marginal_effect(X)
+                                    marg_eff = est.marginal_effect(T, X)
+                                    self.assertEqual(shape(marg_eff), marginal_effect_shape)
+                                    self.assertEqual(shape(const_marg_eff), const_marginal_effect_shape)
+
+                                    np.testing.assert_array_equal(
+                                        marg_eff if d_x else marg_eff[0:1], const_marg_eff)
+
+                                    T0 = np.full_like(T, 'a')
+                                    eff = est.effect(X, T0=T0, T1=T)
+                                    self.assertEqual(shape(eff), effect_shape)
+                                    if inf is not None:
+                                        const_marg_eff_int = est.const_marginal_effect_interval(X)
+                                        marg_eff_int = est.marginal_effect_interval(T, X)
+                                        self.assertEqual(shape(marg_eff_int),
+                                                         (2,) + marginal_effect_shape)
+                                        self.assertEqual(shape(const_marg_eff_int),
+                                                         (2,) + const_marginal_effect_shape)
+                                        self.assertEqual(shape(est.effect_interval(X, T0=T0, T1=T)),
+                                                         (2,) + effect_shape)
+
+                                    est.score(Y, T, X, W)
+
+                                    # make sure we can call effect with implied scalar treatments, no matter the
+                                    # dimensions of T, and also that we warn when there are multiple treatments
+                                    if d_t > 1:
+                                        cm = self.assertWarns(Warning)
+                                    else:
+                                        cm = ExitStack()  # ExitStack can be used as a "do nothing" ContextManager
+                                    with cm:
+                                        effect_shape2 = (n if d_x else 1,) + ((d_y,) if d_y > 0 else())
+                                        eff = est.effect(X, T0='a', T1='b')
+                                        self.assertEqual(shape(eff), effect_shape2)
 
     def test_can_use_vectors(self):
         """