Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add 2d response array support to DRLearner #209

Merged
merged 2 commits into from
Jan 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ Such questions arise frequently in customer segmentation (what is the effect of

# News

**January 10, 2020:** Release v0.6.1, see release notes [here](https://github.com/Microsoft/EconML/releases/tag/v0.6.1)

**December 6, 2019:** Release v0.6, see release notes [here](https://github.com/Microsoft/EconML/releases/tag/v0.6)

**November 21, 2019:** Release v0.5, see release notes [here](https://github.com/Microsoft/EconML/releases/tag/v0.5).
Expand Down
56 changes: 33 additions & 23 deletions econml/drlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,14 @@

import numpy as np
from warnings import warn

from sklearn.base import clone
from sklearn.linear_model import LogisticRegressionCV, LinearRegression, LassoCV
from econml.utilities import inverse_onehot, check_high_dimensional
from econml.sklearn_extensions.linear_model import WeightedLassoCV, DebiasedLasso
from econml.utilities import inverse_onehot, check_high_dimensional, StatsModelsLinearRegression
from econml.sklearn_extensions.linear_model import WeightedLassoCVWrapper, DebiasedLasso
from econml.sklearn_extensions.ensemble import SubsampledHonestForest
from sklearn.base import clone
from econml._ortho_learner import _OrthoLearner
from econml.cate_estimator import StatsModelsCateEstimatorDiscreteMixin, DebiasedLassoCateEstimatorDiscreteMixin
from econml.utilities import StatsModelsLinearRegression
from sklearn.preprocessing import PolynomialFeatures
from econml.inference import GenericModelFinalInferenceDiscrete


Expand Down Expand Up @@ -243,7 +242,7 @@ class takes as input the parameter ``model_regressor``, which is an arbitrary sc
"""

def __init__(self, model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'),
model_regression=WeightedLassoCV(cv=3),
model_regression=WeightedLassoCVWrapper(cv=3),
model_final=StatsModelsLinearRegression(),
multitask_model_final=False,
featurizer=None,
Expand All @@ -258,8 +257,9 @@ def _combine(self, X, W):
return np.hstack([arr for arr in [X, W] if arr is not None])

def fit(self, Y, T, X=None, W=None, *, sample_weight=None):
# TODO Allow for non-vector y, i.e. of shape (n, 1)
assert np.ndim(Y) == 1, "Can only accept single dimensional outcomes Y! Use Y.ravel()."
if Y.ndim != 1 and (Y.ndim != 2 or Y.shape[1] != 1):
raise ValueError("The outcome matrix must be of shape ({0}, ) or ({0}, 1), "
"instead got {1}.".format(len(X), Y.shape))
if (X is None) and (W is None):
raise AttributeError("At least one of X or W has to not be None!")
if np.any(np.all(T == 0, axis=0)) or (not np.any(np.all(T == 0, axis=1))):
Expand All @@ -274,16 +274,17 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None):
def predict(self, Y, T, X=None, W=None, *, sample_weight=None):
XW = self._combine(X, W)
propensities = self._model_propensity.predict_proba(XW)
n = T.shape[0]
Y_pred = np.zeros((T.shape[0], T.shape[1] + 1))
T_counter = np.zeros(T.shape)
Y_pred[:, 0] = self._model_regression.predict(np.hstack([XW, T_counter]))
Y_pred[:, 0] += (Y - Y_pred[:, 0]) * np.all(T == 0, axis=1) / propensities[:, 0]
Y_pred[:, 0] = self._model_regression.predict(np.hstack([XW, T_counter])).reshape(n)
Y_pred[:, 0] += (Y.reshape(n) - Y_pred[:, 0]) * np.all(T == 0, axis=1) / propensities[:, 0]
for t in np.arange(T.shape[1]):
T_counter = np.zeros(T.shape)
T_counter[:, t] = 1
Y_pred[:, t + 1] = self._model_regression.predict(np.hstack([XW, T_counter]))
Y_pred[:, t + 1] += (Y - Y_pred[:, t + 1]) * (T[:, t] == 1) / propensities[:, t + 1]
return Y_pred
Y_pred[:, t + 1] = self._model_regression.predict(np.hstack([XW, T_counter])).reshape(n)
Y_pred[:, t + 1] += (Y.reshape(n) - Y_pred[:, t + 1]) * (T[:, t] == 1) / propensities[:, t + 1]
return Y_pred.reshape(Y.shape + (T.shape[1] + 1,))

class ModelFinal:
# Coding Remark: The reasoning around the multitask_model_final could have been simplified if
Expand All @@ -299,36 +300,45 @@ def __init__(self, model_final, featurizer, multitask_model_final):

def fit(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, sample_var=None):
Y_pred, = nuisances
self.d_y = Y_pred.shape[1:-1] # track whether there's a Y dimension (must be a singleton)
if (X is not None) and (self._featurizer is not None):
X = self._featurizer.fit_transform(X)
filtered_kwargs = _filter_none_kwargs(sample_weight=sample_weight, sample_var=sample_var)
if self._multitask_model_final:
self.model_cate = self._model_final.fit(X, Y_pred[:, 1:] - Y_pred[:, [0]], **filtered_kwargs)
ys = Y_pred[..., 1:] - Y_pred[..., [0]] # subtract control results from each other arm
if self.d_y: # need to squeeze out singleton so that we fit on 2D array
ys = ys.squeeze(1)
self.model_cate = self._model_final.fit(X, ys, **filtered_kwargs)
else:
self.models_cate = [clone(self._model_final, safe=False).fit(X, Y_pred[:, t] - Y_pred[:, 0],
self.models_cate = [clone(self._model_final, safe=False).fit(X, Y_pred[..., t] - Y_pred[..., 0],
**filtered_kwargs)
for t in np.arange(1, Y_pred.shape[1])]
for t in np.arange(1, Y_pred.shape[-1])]
return self

def predict(self, X=None):
if (X is not None) and (self._featurizer is not None):
X = self._featurizer.transform(X)
if self._multitask_model_final:
return self.model_cate.predict(X)
pred = self.model_cate.predict(X)
if self.d_y: # need to reintroduce singleton Y dimension
return pred[:, np.newaxis, :]
return pred
else:
return np.array([mdl.predict(X) for mdl in self.models_cate]).T
preds = np.array([mdl.predict(X) for mdl in self.models_cate])
return np.moveaxis(preds, 0, -1) # move treatment dim to end

def score(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, sample_var=None):
if (X is not None) and (self._featurizer is not None):
X = self._featurizer.transform(X)
Y_pred, = nuisances
if self._multitask_model_final:
return np.mean(np.average((Y_pred[:, 1:] - Y_pred[:, [0]] - self.model_cate.predict(X))**2,
return np.mean(np.average((Y_pred[..., 1:] - Y_pred[..., [0]] - self.model_cate.predict(X))**2,
kbattocchi marked this conversation as resolved.
Show resolved Hide resolved
weights=sample_weight, axis=0))
else:
return np.mean([np.average((Y_pred[:, t] - Y_pred[:, 0] - self.models_cate[t - 1].predict(X))**2,
return np.mean([np.average((Y_pred[..., t] - Y_pred[..., 0] -
self.models_cate[t - 1].predict(X))**2,
weights=sample_weight, axis=0)
for t in np.arange(1, Y_pred.shape[1])])
for t in np.arange(1, Y_pred.shape[-1])])

self._multitask_model_final = multitask_model_final
super().__init__(ModelNuisance(model_propensity, model_regression),
Expand Down Expand Up @@ -615,7 +625,7 @@ class LinearDRLearner(StatsModelsCateEstimatorDiscreteMixin, DRLearner):

def __init__(self,
model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'),
model_regression=WeightedLassoCV(cv=3),
model_regression=WeightedLassoCVWrapper(cv=3),
featurizer=None,
fit_cate_intercept=True,
n_splits=2, random_state=None):
Expand Down Expand Up @@ -806,7 +816,7 @@ class SparseLinearDRLearner(DebiasedLassoCateEstimatorDiscreteMixin, DRLearner):

def __init__(self,
model_propensity=LogisticRegressionCV(cv=3, solver='lbfgs', multi_class='auto'),
model_regression=WeightedLassoCV(cv=3),
model_regression=WeightedLassoCVWrapper(cv=3),
featurizer=None,
fit_cate_intercept=True,
alpha='auto',
Expand Down
12 changes: 6 additions & 6 deletions econml/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,19 +221,19 @@ def const_marginal_effect_interval(self, X, *, alpha=0.1):
if (X is not None) and (self.featurizer is not None):
X = self.featurizer.fit_transform(X)
preds = np.array([mdl.predict_interval(X, alpha=alpha) for mdl in self.fitted_models_final])
return tuple([preds[:, 0, :].T, preds[:, 1, :].T])
return tuple(np.moveaxis(preds, [0, 1], [-1, 0])) # send treatment to the end, pull bounds to the front

def effect_interval(self, X, *, T0, T1, alpha=0.1):
X, T0, T1 = self._est._expand_treatments(X, T0, T1)
if np.any(np.any(T0 > 0, axis=1)):
raise AttributeError("Can only calculate intervals of effects with respect to baseline treatment!")
ind = (T1 @ np.arange(1, T1.shape[1] + 1)).astype(int)
ind = inverse_onehot(T1)
lower, upper = self.const_marginal_effect_interval(X, alpha=alpha)
lower = np.hstack([np.zeros((lower.shape[0], 1)), lower])
upper = np.hstack([np.zeros((upper.shape[0], 1)), upper])
lower = np.concatenate([np.zeros(lower.shape[0:-1] + (1,)), lower], -1)
upper = np.concatenate([np.zeros(upper.shape[0:-1] + (1,)), upper], -1)
if X is None: # Then const_marginal_effect_interval will return a single row
lower, upper = np.tile(lower, (T0.shape[0], 1)), np.tile(upper, (T0.shape[0], 1))
return lower[np.arange(T0.shape[0]), ind], upper[np.arange(T0.shape[0]), ind]
lower, upper = np.repeat(lower, T0.shape[0], axis=0), np.repeat(upper, T0.shape[0], axis=0)
return lower[np.arange(T0.shape[0]), ..., ind], upper[np.arange(T0.shape[0]), ..., ind]


class LinearModelFinalInferenceDiscrete(GenericModelFinalInferenceDiscrete):
Expand Down
106 changes: 55 additions & 51 deletions econml/tests/test_drlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,11 @@ def make_random(is_discrete, d):
else:
return np.random.normal(size=sz)

d_y = 0
is_discrete = True
for d_t in [0, 1]:
for d_x in [2, None]:
for d_w in [2, None]:
with self.subTest(d_t=d_t, d_x=d_x, d_w=d_w):
for d_y in [0, 1]:
is_discrete = True
for d_t in [0, 1]:
for d_x in [2, None]:
for d_w in [2, None]:
W, X, Y, T = [make_random(is_discrete, d)
for is_discrete, d in [(False, d_w),
(False, d_x),
Expand All @@ -96,51 +95,56 @@ def make_random(is_discrete, d):
((d_y,) if d_y > 0 else ()) +
((d_t_final,) if d_t_final > 0 else()))

# TODO: add stratification to bootstrap so that we can use it even with discrete treatments
infs = [None, 'statsmodels']

est = LinearDRLearner(model_regression=Lasso(),
model_propensity=LogisticRegression(C=1000, solver='lbfgs',
multi_class='auto'))

for inf in infs:
with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t,
is_discrete=is_discrete, est=est, inf=inf):
est.fit(Y, T, X, W, inference=inf)
# make sure we can call the marginal_effect and effect methods
const_marg_eff = est.const_marginal_effect(X)
marg_eff = est.marginal_effect(T, X)
self.assertEqual(shape(marg_eff), marginal_effect_shape)
self.assertEqual(shape(const_marg_eff), const_marginal_effect_shape)

np.testing.assert_array_equal(
marg_eff if d_x else marg_eff[0:1], const_marg_eff)

T0 = np.full_like(T, 'a')
eff = est.effect(X, T0=T0, T1=T)
self.assertEqual(shape(eff), effect_shape)
if inf is not None:
const_marg_eff_int = est.const_marginal_effect_interval(X)
marg_eff_int = est.marginal_effect_interval(T, X)
self.assertEqual(shape(marg_eff_int),
(2,) + marginal_effect_shape)
self.assertEqual(shape(const_marg_eff_int),
(2,) + const_marginal_effect_shape)
self.assertEqual(shape(est.effect_interval(X, T0=T0, T1=T)),
(2,) + effect_shape)

est.score(Y, T, X, W)

# make sure we can call effect with implied scalar treatments, no matter the
# dimensions of T, and also that we warn when there are multiple treatments
if d_t > 1:
cm = self.assertWarns(Warning)
else:
cm = ExitStack() # ExitStack can be used as a "do nothing" ContextManager
with cm:
effect_shape2 = (n if d_x else 1,) + ((d_y,) if d_y > 0 else())
eff = est.effect(X, T0='a', T1='b')
self.assertEqual(shape(eff), effect_shape2)
for est in [LinearDRLearner(model_propensity=LogisticRegression(C=1000, solver='lbfgs',
multi_class='auto')),
DRLearner(model_propensity=LogisticRegression(multi_class='auto'),
model_regression=LinearRegression(),
model_final=StatsModelsLinearRegression(),
multitask_model_final=True)]:

# TODO: add stratification to bootstrap so that we can use it even with discrete treatments
infs = [None]
if isinstance(est, LinearDRLearner):
infs.append('statsmodels')

for inf in infs:
with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t,
is_discrete=is_discrete, est=est, inf=inf):
est.fit(Y, T, X, W, inference=inf)
# make sure we can call the marginal_effect and effect methods
const_marg_eff = est.const_marginal_effect(X)
marg_eff = est.marginal_effect(T, X)
self.assertEqual(shape(marg_eff), marginal_effect_shape)
self.assertEqual(shape(const_marg_eff), const_marginal_effect_shape)

np.testing.assert_array_equal(
marg_eff if d_x else marg_eff[0:1], const_marg_eff)

T0 = np.full_like(T, 'a')
eff = est.effect(X, T0=T0, T1=T)
self.assertEqual(shape(eff), effect_shape)
if inf is not None:
const_marg_eff_int = est.const_marginal_effect_interval(X)
marg_eff_int = est.marginal_effect_interval(T, X)
self.assertEqual(shape(marg_eff_int),
(2,) + marginal_effect_shape)
self.assertEqual(shape(const_marg_eff_int),
(2,) + const_marginal_effect_shape)
self.assertEqual(shape(est.effect_interval(X, T0=T0, T1=T)),
(2,) + effect_shape)

est.score(Y, T, X, W)

# make sure we can call effect with implied scalar treatments, no matter the
# dimensions of T, and also that we warn when there are multiple treatments
if d_t > 1:
cm = self.assertWarns(Warning)
else:
cm = ExitStack() # ExitStack can be used as a "do nothing" ContextManager
with cm:
effect_shape2 = (n if d_x else 1,) + ((d_y,) if d_y > 0 else())
eff = est.effect(X, T0='a', T1='b')
self.assertEqual(shape(eff), effect_shape2)

def test_can_use_vectors(self):
"""
Expand Down
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ max-line-length=119
convention=numpy

[build_sphinx]
version = 0.6
version = 0.6.1

[metadata]
name = econml
version = 0.6
version = 0.6.1
author = Microsoft Corporation
description = This package contains several methods for calculating Conditional Average Treatment Effects
long_description = file: README.md
Expand Down