Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable model selection for first stage models #808

Merged
merged 20 commits into from
Nov 11, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 23 additions & 17 deletions econml/_ortho_learner.py
Original file line number Diff line number Diff line change
@@ -45,6 +45,7 @@ class in this module implements the general logic in a very versatile way
from .utilities import (_deprecate_positional, check_input_arrays,
cross_product, filter_none_kwargs,
inverse_onehot, jacify_featurizer, ndim, reshape, shape, transpose)
from .sklearn_extensions.model_selection import ModelSelector

try:
import ray
@@ -100,7 +101,7 @@ def _fit_fold(model, train_idxs, test_idxs, calculate_scores, args, kwargs):
kwargs_train = {key: var[train_idxs] for key, var in kwargs.items()}
kwargs_test = {key: var[test_idxs] for key, var in kwargs.items()}

model.fit(*args_train, **kwargs_train)
model.train(False, *args_train, **kwargs_train)
nuisance_temp = model.predict(*args_test, **kwargs_test)

if not isinstance(nuisance_temp, tuple):
@@ -115,17 +116,18 @@ def _fit_fold(model, train_idxs, test_idxs, calculate_scores, args, kwargs):
return nuisance_temp, model, test_idxs, (score_temp if calculate_scores else None)


def _crossfit(model, folds, use_ray, ray_remote_fun_option, *args, **kwargs):
def _crossfit(model: ModelSelector, folds, use_ray, ray_remote_fun_option, *args, **kwargs):
"""
General crossfit based calculation of nuisance parameters.

Parameters
----------
model : object
An object that supports fit and predict. Fit must accept all the args
and the keyword arguments kwargs. Similarly predict must all accept
all the args as arguments and kwards as keyword arguments. The fit
function estimates a model of the nuisance function, based on the input
model : ModelSelector
An object that has train and predict methods.
The train method must take an 'is_selecting' argument first, and then
accept positional arguments `args` and keyword arguments `kwargs`; the predict method
just takes those `args` and `kwargs`. The train
method selects or estimates a model of the nuisance function, based on the input
data to fit. Predict evaluates the fitted nuisance function on the input
data to predict.
folds : list of tuple or None
@@ -177,7 +179,7 @@ def _crossfit(model, folds, use_ray, ray_remote_fun_option, *args, **kwargs):
class Wrapper:
def __init__(self, model):
self._model = model
def fit(self, X, y, W=None):
def train(self, is_selecting, X, y, W=None):
self._model.fit(X, y)
return self
def predict(self, X, y, W=None):
@@ -202,13 +204,17 @@ def predict(self, X, y, W=None):

"""
model_list = []

kwargs = filter_none_kwargs(**kwargs)
model.train(True, *args, **kwargs)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think in some earlier conversations we were thinking about giving the users the option to do "dirty crossfitting" i.e. picking a good est from all data before cross fitting. Am I correct in my understanding that this PR just does "dirty crossfitting" by default?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, and that's definitely something we could consider making easier for users.

It's possible, though not straightforward, to do non-dirty crossfitting now, by wrapping a CV estimator in a FixedModelSelector, which will always use the estimator as is for both selecting and fitting. However, there are some changes we could make to make this more efficient, since then the selecting step is unnecessary and so we could just skip it.

I'd propose tabling that for now and implementing that as one of several future enhancements to the model selection logic.


calculate_scores = hasattr(model, 'score')
# remove None arguments
kwargs = filter_none_kwargs(**kwargs)

if folds is None: # skip crossfitting
model_list.append(clone(model, safe=False))
model_list[0].fit(*args, **kwargs)
model_list[0].train(True, *args, **kwargs)
model_list[0].train(False, *args, **kwargs) # fit the selected model
nuisances = model_list[0].predict(*args, **kwargs)
scores = model_list[0].score(*args, **kwargs) if calculate_scores else None

@@ -394,7 +400,7 @@ class ModelNuisance:
def __init__(self, model_t, model_y):
self._model_t = model_t
self._model_y = model_y
def fit(self, Y, T, W=None):
def train(self, is_selecting, Y, T, W=None):
self._model_t.fit(W, T)
self._model_y.fit(W, Y)
return self
@@ -448,7 +454,7 @@ class ModelNuisance:
def __init__(self, model_t, model_y):
self._model_t = model_t
self._model_y = model_y
def fit(self, Y, T, W=None):
def train(self, is_selecting, Y, T, W=None):
self._model_t.fit(W, np.matmul(T, np.arange(1, T.shape[1]+1)))
self._model_y.fit(W, Y)
return self
@@ -532,15 +538,15 @@ def _gen_allowed_missing_vars(self):

@abstractmethod
def _gen_ortho_learner_model_nuisance(self):
""" Must return a fresh instance of a nuisance model
"""Must return a fresh instance of a nuisance model selector

Returns
-------
model_nuisance: estimator
The estimator for fitting the nuisance function. Must implement
`fit` and `predict` methods that both have signatures::
model_nuisance: selector
The selector for fitting the nuisance function. The returned estimator must implement
`train` and `predict` methods that both have signatures::

model_nuisance.fit(Y, T, X=X, W=W, Z=Z,
model_nuisance.train(is_selecting, Y, T, X=X, W=W, Z=Z,
sample_weight=sample_weight)
model_nuisance.predict(Y, T, X=X, W=W, Z=Z,
sample_weight=sample_weight)
50 changes: 29 additions & 21 deletions econml/dml/_rlearner.py
Original file line number Diff line number Diff line change
@@ -29,40 +29,35 @@
import numpy as np
import copy
from warnings import warn

from ..sklearn_extensions.model_selection import ModelSelector
from ..utilities import (shape, reshape, ndim, hstack, filter_none_kwargs, _deprecate_positional)
from sklearn.linear_model import LinearRegression
from sklearn.base import clone
from .._ortho_learner import _OrthoLearner


class _ModelNuisance:
class _ModelNuisance(ModelSelector):
"""
Nuisance model fits the model_y and model_t at fit time and at predict time
calculates the residual Y and residual T based on the fitted models and returns
the residuals as two nuisance parameters.
"""

def __init__(self, model_y, model_t):
def __init__(self, model_y: ModelSelector, model_t: ModelSelector):
self._model_y = model_y
self._model_t = model_t

def fit(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None):
def train(self, is_selecting, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None):
assert Z is None, "Cannot accept instrument!"
self._model_t.fit(X, W, T, **filter_none_kwargs(sample_weight=sample_weight, groups=groups))
self._model_y.fit(X, W, Y, **filter_none_kwargs(sample_weight=sample_weight, groups=groups))
self._model_t.train(is_selecting, X, W, T, **filter_none_kwargs(sample_weight=sample_weight, groups=groups))
self._model_y.train(is_selecting, X, W, Y, **filter_none_kwargs(sample_weight=sample_weight, groups=groups))
return self

def score(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None):
if hasattr(self._model_y, 'score'):
# note that groups are not passed to score because they are only used for fitting
Y_score = self._model_y.score(X, W, Y, **filter_none_kwargs(sample_weight=sample_weight))
else:
Y_score = None
if hasattr(self._model_t, 'score'):
# note that groups are not passed to score because they are only used for fitting
T_score = self._model_t.score(X, W, T, **filter_none_kwargs(sample_weight=sample_weight))
else:
T_score = None
# note that groups are not passed to score because they are only used for fitting
T_score = self._model_t.score(X, W, T, **filter_none_kwargs(sample_weight=sample_weight))
Y_score = self._model_y.score(X, W, Y, **filter_none_kwargs(sample_weight=sample_weight))
return Y_score, T_score

def predict(self, Y, T, X=None, W=None, Z=None, sample_weight=None, groups=None):
@@ -208,6 +203,7 @@ class _RLearner(_OrthoLearner):
import numpy as np
from sklearn.linear_model import LinearRegression
from econml.dml._rlearner import _RLearner
from econml.sklearn_extensions.model_selection import SingleModelSelector
from sklearn.base import clone
class ModelFirst:
def __init__(self, model):
@@ -217,6 +213,18 @@ def fit(self, X, W, Y, sample_weight=None):
return self
def predict(self, X, W):
return self._model.predict(np.hstack([X, W]))
class ModelSelector(SingleModelSelector):
def __init__(self, model):
self._model = ModelFirst(model)
def train(self, is_selecting, X, W, Y, sample_weight=None):
self._model.fit(X, W, Y, sample_weight=sample_weight)
return self
@property
def best_model(self):
return self._model
@property
def best_score(self):
return 0
class ModelFinal:
def fit(self, X, T, T_res, Y_res, sample_weight=None, freq_weight=None, sample_var=None):
self.model = LinearRegression(fit_intercept=False).fit(X * T_res.reshape(-1, 1),
@@ -226,9 +234,9 @@ def predict(self, X):
return self.model.predict(X)
class RLearner(_RLearner):
def _gen_model_y(self):
return ModelFirst(LinearRegression())
return ModelSelector(LinearRegression())
def _gen_model_t(self):
return ModelFirst(LinearRegression())
return ModelSelector(LinearRegression())
def _gen_rlearner_model_final(self):
return ModelFinal()
np.random.seed(123)
@@ -302,7 +310,7 @@ def _gen_model_y(self):
"""
Returns
-------
model_y: estimator of E[Y | X, W]
model_y: selector for the estimator of E[Y | X, W]
The estimator for fitting the response to the features and controls. Must implement
`fit` and `predict` methods. Unlike sklearn estimators both methods must
take an extra second argument (the controls), i.e. ::
@@ -317,7 +325,7 @@ def _gen_model_t(self):
"""
Returns
-------
model_t: estimator of E[T | X, W]
model_t: selector for the estimator of E[T | X, W]
The estimator for fitting the treatment to the features and controls. Must implement
`fit` and `predict` methods. Unlike sklearn estimators both methods must
take an extra second argument (the controls), i.e. ::
@@ -432,11 +440,11 @@ def rlearner_model_final_(self):

@property
def models_y(self):
return [[mdl._model_y for mdl in mdls] for mdls in super().models_nuisance_]
return [[mdl._model_y.best_model for mdl in mdls] for mdls in super().models_nuisance_]

@property
def models_t(self):
return [[mdl._model_t for mdl in mdls] for mdls in super().models_nuisance_]
return [[mdl._model_t.best_model for mdl in mdls] for mdls in super().models_nuisance_]

@property
def nuisance_scores_y(self):
24 changes: 6 additions & 18 deletions econml/dml/causal_forest.py
Original file line number Diff line number Diff line change
@@ -11,7 +11,7 @@
from sklearn.model_selection import train_test_split
from itertools import product
from .dml import _BaseDML
from .dml import _FirstStageWrapper
from .dml import _make_first_stage_selector
from ..sklearn_extensions.linear_model import WeightedLassoCVWrapper
from ..sklearn_extensions.model_selection import WeightedStratifiedKFold
from ..inference import NormalInferenceResults
@@ -548,10 +548,10 @@ class CausalForestDML(_BaseDML):
est.fit(y, T, X=X, W=None)

>>> est.effect(X[:3])
array([0.76625..., 1.52176..., 0.73679...])
array([0.88518..., 1.25061..., 0.81112...])
>>> est.effect_interval(X[:3])
(array([0.39668..., 1.08245... , 0.16566...]),
array([1.13581..., 1.96107..., 1.30791...]))
(array([0.40163..., 0.75023..., 0.46629...]),
array([1.36873..., 1.75099..., 1.15596...]))

Attributes
----------
@@ -668,22 +668,10 @@ def _gen_featurizer(self):
return clone(self.featurizer, safe=False)

def _gen_model_y(self):
if self.model_y == 'auto':
model_y = WeightedLassoCVWrapper(random_state=self.random_state)
else:
model_y = clone(self.model_y, safe=False)
return _FirstStageWrapper(model_y, True, self._gen_featurizer(), False, self.discrete_treatment)
return _make_first_stage_selector(self.model_y, False, self.random_state)

def _gen_model_t(self):
if self.model_t == 'auto':
if self.discrete_treatment:
model_t = LogisticRegressionCV(cv=WeightedStratifiedKFold(random_state=self.random_state),
random_state=self.random_state)
else:
model_t = WeightedLassoCVWrapper(random_state=self.random_state)
else:
model_t = clone(self.model_t, safe=False)
return _FirstStageWrapper(model_t, False, self._gen_featurizer(), False, self.discrete_treatment)
return _make_first_stage_selector(self.model_t, self.discrete_treatment, self.random_state)

def _gen_model_final(self):
return MultiOutputGRF(CausalForest(n_estimators=self.n_estimators,
Loading