Skip to content

Commit

Permalink
Add scores.
Browse files Browse the repository at this point in the history
  • Loading branch information
Miruna Oprescu committed Apr 10, 2021
1 parent 1070aea commit 5f6da40
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 16 deletions.
89 changes: 83 additions & 6 deletions econml/dml/dynamic_dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,6 @@ def predict(self, Y, T, X=None, W=None, sample_weight=None, groups=None):
return Y_res, T_res

def score(self, Y, T, X=None, W=None, sample_weight=None, groups=None):
# TODO: implement scores
# TODO: fix correctness?
assert Y.shape[0] % self.n_periods == 0, \
"Length of training data should be an integer multiple of time periods."
inds_score = np.arange(Y.shape[0])[np.arange(Y.shape[0]) % self.n_periods == 0]
Expand Down Expand Up @@ -147,7 +145,7 @@ def __init__(self, model_final, n_periods):
self._model_final_trained = {k: clone(self._model_final, safe=False) for k in np.arange(n_periods)}

def fit(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None):
# TODO: handle sample weight, sample var
# NOTE: sample weight, sample var are not passed in
Y_res, T_res = nuisances
self._d_y = Y.shape[1:]
for kappa in np.arange(self.n_periods):
Expand Down Expand Up @@ -186,8 +184,29 @@ def predict(self, X=None):
return preds

def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None, sample_var=None):
# TODO: implement score
return None
assert Y.shape[0] % self.n_periods == 0, \
"Length of training data should be an integer multiple of time periods."
Y_res, T_res = nuisances

scores = np.full((self.n_periods, ), np.nan)
for kappa in np.arange(self.n_periods):
period = self.n_periods - 1 - kappa
period_filter = self.period_filter_gen(period, Y.shape[0])
Y_adj = Y_res[period_filter].copy()
if kappa > 0:
Y_adj -= np.sum(
[self._model_final_trained[tau].predict_with_res(
X[self.period_filter_gen(self.n_periods - 1 - tau, Y.shape[0])] if X is not None else None,
T_res[period_filter, ..., self.n_periods - 1 - tau]
) for tau in np.arange(kappa)], axis=0)
Y_adj_pred = self._model_final_trained[kappa].predict_with_res(
X[period_filter] if X is not None else None,
T_res[period_filter, ..., period])
if sample_weight is not None:
scores[kappa] = np.mean(np.average((Y_adj - Y_adj_pred)**2, weights=sample_weight, axis=0))
else:
scores[kappa] = np.mean((Y_adj - Y_adj_pred) ** 2)
return scores

def period_filter_gen(self, p, n):
return (np.arange(n) % self.n_periods == p)
Expand Down Expand Up @@ -548,12 +567,39 @@ def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, grou
warn("This CATE estimator does not yet support sample weights and sample variance. "
"These inputs will be ignored during fitting.",
UserWarning)
# TODO: support sample_weight, sample_var?
return super().fit(Y, T, X=X, W=W,
sample_weight=None, sample_var=None, groups=groups,
cache_values=cache_values,
inference=inference)

def score(self, Y, T, X=None, W=None):
"""
Score the fitted CATE model on a new data set. Generates nuisance parameters
for the new data set based on the fitted residual nuisance models created at fit time.
It uses the mean prediction of the models fitted by the different crossfit folds.
Then calculates the MSE of the final residual Y on residual T regression.
If model_final does not have a score method, then it raises an :exc:`.AttributeError`
Parameters
----------
Y: (n, d_y) matrix or vector of length n
Outcomes for each sample (required: n = n_groups * n_periods)
T: (n, d_t) matrix or vector of length n
Treatments for each sample (required: n = n_groups * n_periods)
X: optional(n, d_x) matrix or None (Default=None)
Features for each sample (Required: n = n_groups * n_periods)
W: optional(n, d_w) matrix or None (Default=None)
Controls for each sample (Required: n = n_groups * n_periods)
Returns
-------
score: float
The MSE of the final CATE model on the new data.
"""
# Replacing score from _OrthoLearner, to enforce Z=None and improve the docstring
return super().score(Y, T, X=X, W=W)

def cate_treatment_names(self, treatment_names=None):
"""
Get treatment names for each time period.
Expand Down Expand Up @@ -658,3 +704,34 @@ def model_final(self):
def model_final(self, model):
if model is not None:
raise ValueError("Parameter `model_final` cannot be altered for this estimator!")

@property
def models_y(self):
return [[mdl._model_y for mdl in mdls] for mdls in super().models_nuisance_]

@property
def models_t(self):
return [[mdl._model_t for mdl in mdls] for mdls in super().models_nuisance_]

@property
def nuisance_scores_y(self):
return self.nuisance_scores_[0]

@property
def nuisance_scores_t(self):
return self.nuisance_scores_[1]

@property
def residuals_(self):
"""
A tuple (y_res, T_res, X, W), of the residuals from the first stage estimation
along with the associated X and W. Samples are not guaranteed to be in the same
order as the input order.
"""
if not hasattr(self, '_cached_values'):
raise AttributeError("Estimator is not fitted yet!")
if self._cached_values is None:
raise AttributeError("`fit` was called with `cache_values=False`. "
"Set to `True` to enable residual storage.")
Y_res, T_res = self._cached_values.nuisances
return Y_res, T_res, self._cached_values.X, self._cached_values.W
16 changes: 6 additions & 10 deletions econml/tests/test_dynamic_dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,14 +113,11 @@ def make_random(n, is_discrete, d):
np.testing.assert_allclose(
marg_eff if d_x else marg_eff[0:1], const_marg_eff)

# TODO: add score and nuisance scores
"""
assert isinstance(est.score_, float)
for score in est.nuisance_scores_y:
assert isinstance(score, float)
for score in est.nuisance_scores_t:
assert isinstance(score, float)
"""
assert len(est.score_) == n_periods
for score in est.nuisance_scores_y[0]:
assert score.shape == (n_periods, )
for score in est.nuisance_scores_t[0]:
assert score.shape == (n_periods, n_periods)

T0 = np.full_like(T_test, 'a') if is_discrete else np.zeros_like(T_test)
eff = est.effect(X, T0=T0, T1=T_test)
Expand Down Expand Up @@ -238,8 +235,7 @@ def make_random(n, is_discrete, d):
[0], est.intercept__interval()[0], decimal=5)

est.summary()

# TODO: add score to estimator
# TODO: fix score
"""
est.score(Y, T, X, W)
"""
Expand Down

0 comments on commit 5f6da40

Please sign in to comment.