From 85904ab92996a659d54f99e89c007349120e8999 Mon Sep 17 00:00:00 2001 From: Keith Battocchi Date: Wed, 25 May 2022 11:46:31 -0400 Subject: [PATCH 1/7] Limit notebook refutation test simulations to avoid timeout --- ...B Testing at An Online Travel Company - EconML + DoWhy.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/CustomerScenarios/Case Study - Recommendation AB Testing at An Online Travel Company - EconML + DoWhy.ipynb b/notebooks/CustomerScenarios/Case Study - Recommendation AB Testing at An Online Travel Company - EconML + DoWhy.ipynb index 22a1fb160..bbf02764f 100644 --- a/notebooks/CustomerScenarios/Case Study - Recommendation AB Testing at An Online Travel Company - EconML + DoWhy.ipynb +++ b/notebooks/CustomerScenarios/Case Study - Recommendation AB Testing at An Online Travel Company - EconML + DoWhy.ipynb @@ -1066,7 +1066,7 @@ } ], "source": [ - "res_random = est_dw.refute_estimate(method_name=\"random_common_cause\", num_simulations=10)\n", + "res_random = est_dw.refute_estimate(method_name=\"random_common_cause\", num_simulations=5)\n", "print(res_random)" ] }, From bb5276d228f478ec0c0f7eafd9686a356a43d561 Mon Sep 17 00:00:00 2001 From: Keith Battocchi Date: Fri, 27 May 2022 12:03:25 -0400 Subject: [PATCH 2/7] Change linear model logic to match sklearn changes --- econml/sklearn_extensions/linear_model.py | 18 +++++++++--------- econml/tests/test_dml.py | 4 ++-- econml/tests/test_drlearner.py | 4 ++-- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/econml/sklearn_extensions/linear_model.py b/econml/sklearn_extensions/linear_model.py index 688b25e95..551175333 100644 --- a/econml/sklearn_extensions/linear_model.py +++ b/econml/sklearn_extensions/linear_model.py @@ -105,7 +105,7 @@ def _fit_weighted_linear_model(self, X, y, sample_weight, check_input=None): X, y, X_offset, y_offset, X_scale = self._preprocess_data( X, y, fit_intercept=self.fit_intercept, normalize=False, copy=self.copy_X, check_input=check_input if check_input is not None else True, - sample_weight=sample_weight, return_mean=True) + sample_weight=sample_weight) # Weight inputs normalized_weights = X.shape[0] * sample_weight / np.sum(sample_weight) sqrt_weights = np.sqrt(normalized_weights) @@ -207,7 +207,7 @@ def __init__(self, alpha=1.0, fit_intercept=True, random_state=None, selection='cyclic'): super().__init__( alpha=alpha, fit_intercept=fit_intercept, - normalize=False, precompute=precompute, copy_X=copy_X, + precompute=precompute, copy_X=copy_X, max_iter=max_iter, tol=tol, warm_start=warm_start, positive=positive, random_state=random_state, selection=selection) @@ -297,11 +297,11 @@ class WeightedMultiTaskLasso(WeightedModelMixin, MultiTaskLasso): """ - def __init__(self, alpha=1.0, fit_intercept=True, normalize=False, + def __init__(self, alpha=1.0, fit_intercept=True, copy_X=True, max_iter=1000, tol=1e-4, warm_start=False, random_state=None, selection='cyclic'): super().__init__( - alpha=alpha, fit_intercept=fit_intercept, normalize=False, + alpha=alpha, fit_intercept=fit_intercept, copy_X=copy_X, max_iter=max_iter, tol=tol, warm_start=warm_start, random_state=random_state, selection=selection) @@ -407,13 +407,13 @@ class WeightedLassoCV(WeightedModelMixin, LassoCV): """ def __init__(self, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True, - precompute='auto', max_iter=1000, tol=1e-4, normalize=False, + precompute='auto', max_iter=1000, tol=1e-4, copy_X=True, cv=None, verbose=False, n_jobs=None, positive=False, random_state=None, selection='cyclic'): super().__init__( eps=eps, n_alphas=n_alphas, alphas=alphas, - fit_intercept=fit_intercept, normalize=False, + fit_intercept=fit_intercept, precompute=precompute, max_iter=max_iter, tol=tol, copy_X=copy_X, cv=cv, verbose=verbose, n_jobs=n_jobs, positive=positive, random_state=random_state, selection=selection) @@ -518,13 +518,13 @@ class WeightedMultiTaskLassoCV(WeightedModelMixin, MultiTaskLassoCV): """ def __init__(self, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True, - normalize=False, max_iter=1000, tol=1e-4, + max_iter=1000, tol=1e-4, copy_X=True, cv=None, verbose=False, n_jobs=None, random_state=None, selection='cyclic'): super().__init__( eps=eps, n_alphas=n_alphas, alphas=alphas, - fit_intercept=fit_intercept, normalize=False, + fit_intercept=fit_intercept, max_iter=max_iter, tol=tol, copy_X=copy_X, cv=cv, verbose=verbose, n_jobs=n_jobs, random_state=random_state, selection=selection) @@ -733,7 +733,7 @@ def fit(self, X, y, sample_weight=None, check_input=True): # Center X, y X, y, X_offset, y_offset, X_scale = self._preprocess_data( X, y, fit_intercept=self.fit_intercept, normalize=False, - copy=self.copy_X, check_input=check_input, sample_weight=sample_weight, return_mean=True) + copy=self.copy_X, check_input=check_input, sample_weight=sample_weight) # Calculate quantities that will be used later on. Account for centered data y_pred = self.predict(X) - self.intercept_ diff --git a/econml/tests/test_dml.py b/econml/tests/test_dml.py index c930ca933..6075e5fae 100644 --- a/econml/tests/test_dml.py +++ b/econml/tests/test_dml.py @@ -1143,13 +1143,13 @@ def test_groups(self): # test nested grouping class NestedModel(LassoCV): def __init__(self, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True, - precompute='auto', max_iter=1000, tol=1e-4, normalize=False, + precompute='auto', max_iter=1000, tol=1e-4, copy_X=True, cv=None, verbose=False, n_jobs=None, positive=False, random_state=None, selection='cyclic'): super().__init__( eps=eps, n_alphas=n_alphas, alphas=alphas, - fit_intercept=fit_intercept, normalize=normalize, + fit_intercept=fit_intercept, precompute=precompute, max_iter=max_iter, tol=tol, copy_X=copy_X, cv=cv, verbose=verbose, n_jobs=n_jobs, positive=positive, random_state=random_state, selection=selection) diff --git a/econml/tests/test_drlearner.py b/econml/tests/test_drlearner.py index 1f81bd0ab..e2c2bc5ee 100644 --- a/econml/tests/test_drlearner.py +++ b/econml/tests/test_drlearner.py @@ -801,13 +801,13 @@ def test_groups(self): # test nested grouping class NestedModel(LassoCV): def __init__(self, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True, - precompute='auto', max_iter=1000, tol=1e-4, normalize=False, + precompute='auto', max_iter=1000, tol=1e-4, copy_X=True, cv=None, verbose=False, n_jobs=None, positive=False, random_state=None, selection='cyclic'): super().__init__( eps=eps, n_alphas=n_alphas, alphas=alphas, - fit_intercept=fit_intercept, normalize=normalize, + fit_intercept=fit_intercept, precompute=precompute, max_iter=max_iter, tol=tol, copy_X=copy_X, cv=cv, verbose=verbose, n_jobs=n_jobs, positive=positive, random_state=random_state, selection=selection) From 836adef436d57d4b8ce949161ad1af0da640c5d3 Mon Sep 17 00:00:00 2001 From: Keith Battocchi Date: Wed, 8 Jun 2022 11:39:03 -0400 Subject: [PATCH 3/7] Make `pip freeze` part of pipelines --- azure-pipelines-steps.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines-steps.yml b/azure-pipelines-steps.yml index 701db9bb4..a2d411917 100644 --- a/azure-pipelines-steps.yml +++ b/azure-pipelines-steps.yml @@ -59,7 +59,7 @@ jobs: condition: and(succeeded(), eq(variables['Agent.OS'], 'Darwin')) # Install the package - - script: 'python -m pip install --upgrade pip && pip install --upgrade setuptools wheel Cython && pip install ${{ parameters.package }}' + - script: 'python -m pip install --upgrade pip && pip install --upgrade setuptools wheel Cython && pip install ${{ parameters.package }} && pip freeze --exclude-editable' displayName: 'Install dependencies' - ${{ parameters.job.steps }} From 846a7906085ac89b9598a86201524d871e2e42fe Mon Sep 17 00:00:00 2001 From: Keith Battocchi Date: Wed, 8 Jun 2022 11:55:33 -0400 Subject: [PATCH 4/7] Cap protobuf version to work around tensorflow issue --- setup.cfg | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/setup.cfg b/setup.cfg index c74bfb7b8..f85e915ea 100644 --- a/setup.cfg +++ b/setup.cfg @@ -57,6 +57,8 @@ tf = ; This extra is not currently compatible with python 3.9 or above because of tensorflow breaking changes keras < 2.4;python_version < '3.9' tensorflow > 1.10, < 2.3;python_version < '3.9' + ; Version capped due to tensorflow incompatibility + protobuf < 4 plt = graphviz matplotlib @@ -64,6 +66,8 @@ all = azure-cli keras < 2.4 tensorflow > 1.10, < 2.3 + ; Version capped due to tensorflow incompatibility + protobuf < 4 matplotlib [options.packages.find] From 2b2889f8f063dcea04bed4649b74758e885aef97 Mon Sep 17 00:00:00 2001 From: Keith Battocchi Date: Wed, 8 Jun 2022 13:21:21 -0400 Subject: [PATCH 5/7] Remove Python 3.6 support --- azure-pipelines-steps.yml | 2 +- azure-pipelines.yml | 2 +- doc/spec/estimation/dml.rst | 4 ++-- setup.cfg | 1 - 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/azure-pipelines-steps.yml b/azure-pipelines-steps.yml index a2d411917..3fbe962e4 100644 --- a/azure-pipelines-steps.yml +++ b/azure-pipelines-steps.yml @@ -6,7 +6,7 @@ parameters: package: '-e .' images: ['ubuntu-18.04', 'macOS-10.15', 'windows-2019'] - versions: ['3.6', '3.7', '3.8', '3.9'] + versions: ['3.7', '3.8', '3.9'] job: job: Job diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 9e645e971..905c113df 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -52,7 +52,7 @@ jobs: - template: azure-pipelines-steps.yml parameters: - versions: ['3.6'] + versions: ['3.8'] images: ['ubuntu-18.04'] package: '-e .[all]' job: diff --git a/doc/spec/estimation/dml.rst b/doc/spec/estimation/dml.rst index a31e44c39..ff3f34263 100644 --- a/doc/spec/estimation/dml.rst +++ b/doc/spec/estimation/dml.rst @@ -553,8 +553,8 @@ Usage FAQs from econml.dml import DML from sklearn.linear_model import ElasticNetCV from sklearn.ensemble import RandomForestRegressor - est = DML(model_y=RandomForestRegressor(oob_score=True), - model_t=RandomForestRegressor(oob_score=True), + est = DML(model_y=RandomForestRegressor(), + model_t=RandomForestRegressor(), model_final=ElasticNetCV(fit_intercept=False), featurizer=PolynomialFeatures(degree=1)) est.fit(y, T, X=X, W=W) est.score_ diff --git a/setup.cfg b/setup.cfg index f85e915ea..dc2ffa146 100644 --- a/setup.cfg +++ b/setup.cfg @@ -20,7 +20,6 @@ project_urls = Source Code=https://github.com/Microsoft/EconML Documentation=https://econml.azurewebsites.net/ classifiers = - Programming Language :: Python :: 3.6 Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 From dbc32eb65515c65dec778fab3a489bed8c075e9f Mon Sep 17 00:00:00 2001 From: Keith Battocchi Date: Wed, 8 Jun 2022 13:37:39 -0400 Subject: [PATCH 6/7] Enable sphinx 5.0 --- doc/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/conf.py b/doc/conf.py index 12eab5cdc..57cfe9fb3 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -89,7 +89,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. From e28d0b412b7d95eb9a73516b9052e4d4b6a8319d Mon Sep 17 00:00:00 2001 From: Keith Battocchi Date: Wed, 8 Jun 2022 15:50:52 -0400 Subject: [PATCH 7/7] Update dowhy documentation link --- doc/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/conf.py b/doc/conf.py index 57cfe9fb3..8edb25a3b 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -219,7 +219,7 @@ 'sklearn': ('https://scikit-learn.org/stable/', None), 'matplotlib': ('https://matplotlib.org/', None), 'shap': ('https://shap.readthedocs.io/en/stable/', None), - 'dowhy': ('https://microsoft.github.io/dowhy/', None)} + 'dowhy': ('https://py-why.github.io/dowhy/', None)} # -- Options for todo extension ----------------------------------------------