Skip to content

Commit

Permalink
Reduce test flakiness (#575)
Browse files Browse the repository at this point in the history
  • Loading branch information
xrowan authored Apr 6, 2022
1 parent 415cb80 commit 3228508
Show file tree
Hide file tree
Showing 9 changed files with 261 additions and 201 deletions.
47 changes: 35 additions & 12 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ jobs:
# Work around https://github.com/pypa/pip/issues/9542
- script: 'pip install -U numpy~=1.21.0'
displayName: 'Upgrade numpy'

- script: 'pip install pytest pytest-runner jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm && python setup.py pytest'
- script: 'pip install pytest pytest-runner jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm && pip list && python setup.py pytest'
displayName: 'Unit tests'
env:
PYTEST_ADDOPTS: '-m "notebook"'
Expand All @@ -126,12 +126,6 @@ jobs:
# Work around https://github.com/pypa/pip/issues/9542
- script: 'pip install -U numpy~=1.21.0'
displayName: 'Upgrade numpy'

# shap 0.39 and sklearn 1.0 interact badly in these notebooks
# shap 0.40 has a bug in waterfall (https://github.com/slundberg/shap/issues/2283) that breaks our main tests
# but fixes the interaction here...
- script: 'pip install -U shap~=0.40.0'
displayName: 'Upgrade shap'

- script: 'pip install pytest pytest-runner jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm && python setup.py pytest'
displayName: 'Unit tests'
Expand Down Expand Up @@ -207,7 +201,7 @@ jobs:
- script: 'pip install pytest pytest-runner && python setup.py pytest'
displayName: 'Unit tests'
env:
PYTEST_ADDOPTS: '-m "not (notebook or automl or dml or causal)" -n 2'
PYTEST_ADDOPTS: '-m "not (notebook or automl or dml or serial or cate_api)" -n 2'
COVERAGE_PROCESS_START: 'setup.cfg'
- task: PublishTestResults@2
displayName: 'Publish Test Results **/test-results.xml'
Expand Down Expand Up @@ -253,15 +247,44 @@ jobs:
parameters:
package: '-e .[tf,plt]'
job:
job: Tests_causal
job: Tests_serial
dependsOn: 'EvalChanges'
condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True')
displayName: 'Run tests (Causal)'
displayName: 'Run tests (Serial)'
steps:
- script: 'pip install pytest pytest-runner && python setup.py pytest'
displayName: 'Unit tests'
env:
PYTEST_ADDOPTS: '-m "causal" -n 1'
PYTEST_ADDOPTS: '-m "serial" -n 1'
COVERAGE_PROCESS_START: 'setup.cfg'
- task: PublishTestResults@2
displayName: 'Publish Test Results **/test-results.xml'
inputs:
testResultsFiles: '**/test-results.xml'
testRunTitle: 'Python $(python.version), image $(imageName)'
condition: succeededOrFailed()

- task: PublishCodeCoverageResults@1
displayName: 'Publish Code Coverage Results'
inputs:
codeCoverageTool: Cobertura
summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml'

- template: azure-pipelines-steps.yml
parameters:
package: '-e .[tf,plt]'
job:
job: Tests_CATE_API
dependsOn: 'EvalChanges'
condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True')
displayName: 'Run tests (Other)'
steps:
- script: 'pip install pytest pytest-runner'
displayName: 'Install pytest'
- script: 'python setup.py pytest'
displayName: 'CATE Unit tests'
env:
PYTEST_ADDOPTS: '-m "cate_api" -n auto'
COVERAGE_PROCESS_START: 'setup.cfg'
- task: PublishTestResults@2
displayName: 'Publish Test Results **/test-results.xml'
Expand Down
37 changes: 22 additions & 15 deletions econml/tests/test_causal_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,14 @@
# Licensed under the MIT License.

import unittest

from contextlib import ExitStack
import itertools
import numpy as np
from numpy.core.fromnumeric import squeeze
import pandas as pd
from contextlib import ExitStack
import pytest

from econml.solutions.causal_analysis import CausalAnalysis
from econml.solutions.causal_analysis._causal_analysis import _CausalInsightsConstants

Expand All @@ -15,7 +18,7 @@ def assert_less_close(arr1, arr2):
assert np.all(np.logical_or(arr1 <= arr2, np.isclose(arr1, arr2)))


@pytest.mark.causal
@pytest.mark.serial
class TestCausalAnalysis(unittest.TestCase):

def test_basic_array(self):
Expand Down Expand Up @@ -670,21 +673,24 @@ def test_random_state(self):
inds = [0, 1, 2, 3]
cats = [2, 3]
hinds = [0, 3]
for n_model in ['linear', 'automl']:
for h_model in ['linear', 'forest']:
for classification in [True, False]:
ca = CausalAnalysis(inds, cats, hinds, classification=classification,
nuisance_models=n_model, heterogeneity_model=h_model, random_state=123)
ca.fit(X, y)
glo = ca.global_causal_effect()

ca2 = CausalAnalysis(inds, cats, hinds, classification=classification,
nuisance_models=n_model, heterogeneity_model=h_model, random_state=123)
ca2.fit(X, y)
glo2 = ca.global_causal_effect()
for n_model, h_model, classification in\
itertools.product(['linear', 'automl'],
['linear', 'forest'],
[True, False]):

np.testing.assert_equal(glo.point.values, glo2.point.values)
np.testing.assert_equal(glo.stderr.values, glo2.stderr.values)
ca = CausalAnalysis(inds, cats, hinds, classification=classification,
nuisance_models=n_model, heterogeneity_model=h_model, random_state=123)
ca.fit(X, y)
glo = ca.global_causal_effect()

ca2 = CausalAnalysis(inds, cats, hinds, classification=classification,
nuisance_models=n_model, heterogeneity_model=h_model, random_state=123)
ca2.fit(X, y)
glo2 = ca.global_causal_effect()

np.testing.assert_equal(glo.point.values, glo2.point.values)
np.testing.assert_equal(glo.stderr.values, glo2.stderr.values)

def test_can_set_categories(self):
y = pd.Series(np.random.choice([0, 1], size=(500,)))
Expand Down Expand Up @@ -784,6 +790,7 @@ def test_invalid_inds(self):
# Pass an example where W is irrelevant and X is confounder
# As long as DML doesnt change the order of the inputs, then things should be good. Otherwise X would be
# zeroed out and the test will fail

def test_scaling_transforms(self):
# shouldn't matter if X is scaled much larger or much smaller than W, we should still get good estimates
n = 2000
Expand Down
13 changes: 8 additions & 5 deletions econml/tests/test_dmliv.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import unittest
import pytest
import pickle
import unittest

import numpy as np
import pytest
from scipy import special
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.preprocessing import PolynomialFeatures

from econml.iv.dml import OrthoIV, DMLIV, NonParamDMLIV
from econml.iv.dr._dr import _DummyCATE
from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression
from sklearn.preprocessing import PolynomialFeatures
from econml.utilities import shape
from econml.iv.dml import OrthoIV, DMLIV, NonParamDMLIV


@pytest.mark.cate_api
class TestDMLIV(unittest.TestCase):
def test_cate_api(self):
def const_marg_eff_shape(n, d_x, d_y, binary_T):
Expand Down
Loading

0 comments on commit 3228508

Please sign in to comment.