Skip to content

Commit

Permalink
Create panel subpackage
Browse files Browse the repository at this point in the history
  • Loading branch information
kbattocchi committed Sep 14, 2022
1 parent dff978e commit 221aec3
Show file tree
Hide file tree
Showing 11 changed files with 1,158 additions and 966 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ To install from source, see [For Developers](#for-developers) section below.
<summary>Dynamic Double Machine Learning (click to expand)</summary>

```Python
from econml.dynamic.dml import DynamicDML
from econml.panel.dml import DynamicDML
# Use defaults
est = DynamicDML()
# Or specify hyperparameters
Expand Down
2 changes: 1 addition & 1 deletion doc/reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ Dynamic Double Machine Learning
.. autosummary::
:toctree: _autosummary

econml.dynamic.dml.DynamicDML
econml.panel.dml.DynamicDML

.. _policy_api:

Expand Down
6 changes: 3 additions & 3 deletions doc/spec/estimation/dynamic_dml.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ characteristics :math:`X` of the treated samples, then one can use this method.

.. testcode::

from econml.dynamic.dml import DynamicDML
from econml.panel.dml import DynamicDML
est = DynamicDML()
est.fit(y_dyn, T_dyn, X=X_dyn, W=W_dyn, groups=groups)

Expand All @@ -57,7 +57,7 @@ Class Hierarchy Structure
In this library we implement variants of several of the approaches mentioned in the last section. The hierarchy
structure of the implemented CATE estimators is as follows.

.. inheritance-diagram:: econml.dynamic.dml.DynamicDML
.. inheritance-diagram:: econml.panel.dml.DynamicDML
:parts: 1
:private-bases:
:top-classes: econml._OrthoLearner, econml._cate_estimator.LinearModelFinalCateEstimatorMixin
Expand All @@ -83,7 +83,7 @@ Below we give a brief description of each of these classes:

.. testcode::

from econml.dynamic.dml import DynamicDML
from econml.panel.dml import DynamicDML
est = DynamicDML()
est.fit(y_dyn, T_dyn, X=X_dyn, W=W_dyn, groups=groups)

Expand Down
154 changes: 152 additions & 2 deletions econml/dynamic/dml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,156 @@
`<https://arxiv.org/abs/2002.07285>`_, 2021.
"""

from ._dml import DynamicDML
import econml.panel.dml
from econml.utilities import deprecated

__all__ = ["DynamicDML"]

@deprecated("The DynamicDML class has been moved to econml.panel.dml.DynamicDML; "
"an upcoming release will remove the econml.panel package, please update references to the new location")
def DynamicDML(*,
model_y='auto', model_t='auto',
featurizer=None,
fit_cate_intercept=True,
linear_first_stages=False,
discrete_treatment=False,
categories='auto',
cv=2,
mc_iters=None,
mc_agg='mean',
random_state=None):
"""CATE estimator for dynamic treatment effect estimation.
This estimator is an extension of the Double ML approach for treatments assigned sequentially
over time periods.
The estimator is a special case of an :class:`_OrthoLearner` estimator, so it follows the two
stage process, where a set of nuisance functions are estimated in the first stage in a crossfitting
manner and a final stage estimates the CATE model. See the documentation of
:class:`._OrthoLearner` for a description of this two stage process.
Parameters
----------
model_y: estimator or 'auto', optional (default is 'auto')
The estimator for fitting the response to the features. Must implement
`fit` and `predict` methods.
If 'auto' :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV` will be chosen.
model_t: estimator or 'auto', optional (default is 'auto')
The estimator for fitting the treatment to the features.
If estimator, it must implement `fit` and `predict` methods;
If 'auto', :class:`~sklearn.linear_model.LogisticRegressionCV` will be applied for discrete treatment,
and :class:`.WeightedLassoCV`/:class:`.WeightedMultiTaskLassoCV`
will be applied for continuous treatment.
featurizer : :term:`transformer`, optional, default None
Must support fit_transform and transform. Used to create composite features in the final CATE regression.
It is ignored if X is None. The final CATE will be trained on the outcome of featurizer.fit_transform(X).
If featurizer=None, then CATE is trained on X.
fit_cate_intercept : bool, optional, default True
Whether the linear CATE model should have a constant term.
linear_first_stages: bool
Whether the first stage models are linear (in which case we will expand the features passed to
`model_y` accordingly)
discrete_treatment: bool, optional (default is ``False``)
Whether the treatment values should be treated as categorical, rather than continuous, quantities
categories: 'auto' or list, default 'auto'
The categories to use when encoding discrete treatments (or 'auto' to use the unique sorted values).
The first category will be treated as the control treatment.
cv: int, cross-validation generator or an iterable, optional (Default=2)
Determines the cross-validation splitting strategy.
Possible inputs for cv are:
- None, to use the default 3-fold cross-validation,
- integer, to specify the number of folds.
- :term:`CV splitter`
- An iterable yielding (train, test) splits as arrays of indices.
Iterables should make sure a group belongs to a single split.
For integer/None inputs, :class:`~sklearn.model_selection.GroupKFold` is used
Unless an iterable is used, we call `split(X, T, groups)` to generate the splits.
mc_iters: int, optional (default=None)
The number of times to rerun the first stage models to reduce the variance of the nuisances.
mc_agg: {'mean', 'median'}, optional (default='mean')
How to aggregate the nuisance value for each sample across the `mc_iters` monte carlo iterations of
cross-fitting.
random_state: int, :class:`~numpy.random.mtrand.RandomState` instance or None, optional (default=None)
If int, random_state is the seed used by the random number generator;
If :class:`~numpy.random.mtrand.RandomState` instance, random_state is the random number generator;
If None, the random number generator is the :class:`~numpy.random.mtrand.RandomState` instance used
by :mod:`np.random<numpy.random>`.
Examples
--------
A simple example with default models:
.. testcode::
:hide:
import numpy as np
np.set_printoptions(suppress=True)
.. testcode::
from econml.panel.dml import DynamicDML
np.random.seed(123)
n_panels = 100 # number of panels
n_periods = 3 # number of time periods per panel
n = n_panels * n_periods
groups = np.repeat(a=np.arange(n_panels), repeats=n_periods, axis=0)
X = np.random.normal(size=(n, 1))
T = np.random.normal(size=(n, 2))
y = np.random.normal(size=(n, ))
est = DynamicDML()
est.fit(y, T, X=X, W=None, groups=groups, inference="auto")
>>> est.const_marginal_effect(X[:2])
array([[-0.336..., -0.048..., -0.061..., 0.042..., -0.204...,
0.00667271],
[-0.101..., 0.433..., 0.054..., -0.217..., -0.101...,
-0.159...]])
>>> est.effect(X[:2], T0=0, T1=1)
array([-0.601..., -0.091...])
>>> est.effect(X[:2], T0=np.zeros((2, n_periods*T.shape[1])), T1=np.ones((2, n_periods*T.shape[1])))
array([-0.601..., -0.091...])
>>> est.coef_
array([[ 0.112...],
[ 0.231...],
[ 0.055...],
[-0.125...],
[ 0.049...],
[-0.079...]])
>>> est.coef__interval()
(array([[-0.063...],
[-0.009...],
[-0.114...],
[-0.413...],
[-0.117...],
[-0.262...]]), array([[0.289...],
[0.471...],
[0.225...],
[0.163...],
[0.216...],
[0.103...]]))
"""
return econml.panel.dml.DynamicDML(
model_y=model_y, model_t=model_t,
featurizer=featurizer,
fit_cate_intercept=fit_cate_intercept,
linear_first_stages=linear_first_stages,
discrete_treatment=discrete_treatment,
categories=categories,
cv=cv,
mc_iters=mc_iters,
mc_agg=mc_agg,
random_state=random_state)
4 changes: 4 additions & 0 deletions econml/panel/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

__all__ = ["dml"]
20 changes: 20 additions & 0 deletions econml/panel/dml/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

"""Double Machine Learning for Dynamic Treatment Effects.
A Double/Orthogonal machine learning approach to estimation of heterogeneous
treatment effect in the dynamic treatment regime. For the theoretical
foundations of these methods see: [dynamicdml]_.
References
----------
.. [dynamicdml] Greg Lewis and Vasilis Syrgkanis.
Double/Debiased Machine Learning for Dynamic Treatment Effects.
`<https://arxiv.org/abs/2002.07285>`_, 2021.
"""

from ._dml import DynamicDML

__all__ = ["DynamicDML"]
2 changes: 1 addition & 1 deletion econml/dynamic/dml/_dml.py → econml/panel/dml/_dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ class DynamicDML(LinearModelFinalCateEstimatorMixin, _OrthoLearner):
.. testcode::
from econml.dynamic.dml import DynamicDML
from econml.panel.dml import DynamicDML
np.random.seed(123)
Expand Down
39 changes: 39 additions & 0 deletions econml/panel/utilities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@

import numpy as np


def long(x):
"""
Reshape panel data to long format, i.e. (n_units * n_periods, d_x) or (n_units * n_periods,)
Parameters
----------
x : array-like
Panel data in wide format
Returns
-------
arr : array-like
Reshaped panel data in long format"""
n_units = x.shape[0]
n_periods = x.shape[1]
if np.ndim(x) == 2:
return x.reshape(n_units * n_periods)
else:
return x.reshape(n_units * n_periods, -1)


def wide(x):
"""Reshape panel data to wide format, i.e. (n_units, n_periods * d_x) or (n_units, n_periods,)
Parameters
----------
x : array-like
Panel data in long format
Returns
-------
arr : array-like
Reshaped panel data in wide format"""
n_units = x.shape[0]
return x.reshape(n_units, -1)
4 changes: 2 additions & 2 deletions econml/tests/test_dynamic_dml.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
from sklearn.preprocessing import OneHotEncoder, FunctionTransformer, PolynomialFeatures
from sklearn.linear_model import (LinearRegression, LassoCV, Lasso, MultiTaskLasso,
MultiTaskLassoCV, LogisticRegression)
from econml.dynamic.dml import DynamicDML
from econml.dynamic.dml._dml import _get_groups_period_filter
from econml.panel.dml import DynamicDML
from econml.panel.dml._dml import _get_groups_period_filter
from econml.inference import BootstrapInference, EmpiricalInferenceResults, NormalInferenceResults
from econml.utilities import shape, hstack, vstack, reshape, cross_product
import econml.tests.utilities # bugfix for assertWarns
Expand Down

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions notebooks/Dynamic Double Machine Learning Examples.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
"outputs": [],
"source": [
"# Main imports\n",
"from econml.dynamic.dml import DynamicDML\n",
"from econml.panel.dml import DynamicDML\n",
"from econml.tests.dgp import DynamicPanelDGP, add_vlines\n",
"\n",
"# Helper imports\n",
Expand Down Expand Up @@ -188,7 +188,7 @@
{
"data": {
"text/plain": [
"<econml.dynamic.dml._dml.DynamicDML at 0x19d2abd6a00>"
"<econml.panel.dml._dml.DynamicDML at 0x19d2abd6a00>"
]
},
"execution_count": 7,
Expand Down Expand Up @@ -461,7 +461,7 @@
{
"data": {
"text/plain": [
"<econml.dynamic.dml._dml.DynamicDML at 0x19d2ae7e5e0>"
"<econml.panel.dml._dml.DynamicDML at 0x19d2ae7e5e0>"
]
},
"execution_count": 17,
Expand Down

0 comments on commit 221aec3

Please sign in to comment.