diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 4c80ed729..7f6282ae3 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -98,8 +98,8 @@ jobs:
       # Work around https://github.com/pypa/pip/issues/9542
       - script: 'pip install -U numpy~=1.21.0'
         displayName: 'Upgrade numpy'
-
-      - script: 'pip install pytest pytest-runner jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm && python setup.py pytest'
+        
+      - script: 'pip install pytest pytest-runner jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm && pip list && python setup.py pytest'
         displayName: 'Unit tests'
         env:
           PYTEST_ADDOPTS: '-m "notebook"'
@@ -126,12 +126,6 @@ jobs:
       # Work around https://github.com/pypa/pip/issues/9542
       - script: 'pip install -U numpy~=1.21.0'
         displayName: 'Upgrade numpy'
-
-      # shap 0.39 and sklearn 1.0 interact badly in these notebooks
-      # shap 0.40 has a bug in waterfall (https://github.com/slundberg/shap/issues/2283) that breaks our main tests
-      # but fixes the interaction here...
-      - script: 'pip install -U shap~=0.40.0'
-        displayName: 'Upgrade shap'
         
       - script: 'pip install pytest pytest-runner jupyter jupyter-client nbconvert nbformat seaborn xgboost tqdm && python setup.py pytest'
         displayName: 'Unit tests'
@@ -207,7 +201,7 @@ jobs:
       - script: 'pip install pytest pytest-runner && python setup.py pytest'
         displayName: 'Unit tests'
         env:
-          PYTEST_ADDOPTS: '-m "not (notebook or automl or dml or causal)" -n 2'
+          PYTEST_ADDOPTS: '-m "not (notebook or automl or dml or serial or cate_api)" -n 2'
           COVERAGE_PROCESS_START: 'setup.cfg'
       - task: PublishTestResults@2
         displayName: 'Publish Test Results **/test-results.xml'
@@ -253,15 +247,44 @@ jobs:
   parameters:
     package: '-e .[tf,plt]'
     job:
-      job: Tests_causal
+      job: Tests_serial
       dependsOn: 'EvalChanges'
       condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True')
-      displayName: 'Run tests (Causal)'
+      displayName: 'Run tests (Serial)'
       steps:
       - script: 'pip install pytest pytest-runner && python setup.py pytest'
         displayName: 'Unit tests'
         env:
-          PYTEST_ADDOPTS: '-m "causal" -n 1'
+          PYTEST_ADDOPTS: '-m "serial" -n 1'
+          COVERAGE_PROCESS_START: 'setup.cfg'
+      - task: PublishTestResults@2
+        displayName: 'Publish Test Results **/test-results.xml'
+        inputs:
+          testResultsFiles: '**/test-results.xml'
+          testRunTitle: 'Python $(python.version), image $(imageName)'
+        condition: succeededOrFailed()
+
+      - task: PublishCodeCoverageResults@1
+        displayName: 'Publish Code Coverage Results'
+        inputs:
+          codeCoverageTool: Cobertura
+          summaryFileLocation: '$(System.DefaultWorkingDirectory)/**/coverage.xml'
+
+- template: azure-pipelines-steps.yml
+  parameters:
+    package: '-e .[tf,plt]'
+    job:
+      job: Tests_CATE_API
+      dependsOn: 'EvalChanges'
+      condition: eq(dependencies.EvalChanges.outputs['output.testCode'], 'True')
+      displayName: 'Run tests (Other)'
+      steps:
+      - script: 'pip install pytest pytest-runner'
+        displayName: 'Install pytest'
+      - script: 'python setup.py pytest'
+        displayName: 'CATE Unit tests'
+        env:
+          PYTEST_ADDOPTS: '-m "cate_api" -n auto'
           COVERAGE_PROCESS_START: 'setup.cfg'
       - task: PublishTestResults@2
         displayName: 'Publish Test Results **/test-results.xml'
diff --git a/econml/tests/test_causal_analysis.py b/econml/tests/test_causal_analysis.py
index 316042e93..f99c648d0 100644
--- a/econml/tests/test_causal_analysis.py
+++ b/econml/tests/test_causal_analysis.py
@@ -2,11 +2,14 @@
 # Licensed under the MIT License.
 
 import unittest
+
+from contextlib import ExitStack
+import itertools
 import numpy as np
 from numpy.core.fromnumeric import squeeze
 import pandas as pd
-from contextlib import ExitStack
 import pytest
+
 from econml.solutions.causal_analysis import CausalAnalysis
 from econml.solutions.causal_analysis._causal_analysis import _CausalInsightsConstants
 
@@ -15,7 +18,7 @@ def assert_less_close(arr1, arr2):
     assert np.all(np.logical_or(arr1 <= arr2, np.isclose(arr1, arr2)))
 
 
-@pytest.mark.causal
+@pytest.mark.serial
 class TestCausalAnalysis(unittest.TestCase):
 
     def test_basic_array(self):
@@ -670,21 +673,24 @@ def test_random_state(self):
         inds = [0, 1, 2, 3]
         cats = [2, 3]
         hinds = [0, 3]
-        for n_model in ['linear', 'automl']:
-            for h_model in ['linear', 'forest']:
-                for classification in [True, False]:
-                    ca = CausalAnalysis(inds, cats, hinds, classification=classification,
-                                        nuisance_models=n_model, heterogeneity_model=h_model, random_state=123)
-                    ca.fit(X, y)
-                    glo = ca.global_causal_effect()
 
-                    ca2 = CausalAnalysis(inds, cats, hinds, classification=classification,
-                                         nuisance_models=n_model, heterogeneity_model=h_model, random_state=123)
-                    ca2.fit(X, y)
-                    glo2 = ca.global_causal_effect()
+        for n_model, h_model, classification in\
+            itertools.product(['linear', 'automl'],
+                              ['linear', 'forest'],
+                              [True, False]):
 
-                    np.testing.assert_equal(glo.point.values, glo2.point.values)
-                    np.testing.assert_equal(glo.stderr.values, glo2.stderr.values)
+            ca = CausalAnalysis(inds, cats, hinds, classification=classification,
+                                nuisance_models=n_model, heterogeneity_model=h_model, random_state=123)
+            ca.fit(X, y)
+            glo = ca.global_causal_effect()
+
+            ca2 = CausalAnalysis(inds, cats, hinds, classification=classification,
+                                 nuisance_models=n_model, heterogeneity_model=h_model, random_state=123)
+            ca2.fit(X, y)
+            glo2 = ca.global_causal_effect()
+
+            np.testing.assert_equal(glo.point.values, glo2.point.values)
+            np.testing.assert_equal(glo.stderr.values, glo2.stderr.values)
 
     def test_can_set_categories(self):
         y = pd.Series(np.random.choice([0, 1], size=(500,)))
@@ -784,6 +790,7 @@ def test_invalid_inds(self):
     # Pass an example where W is irrelevant and X is confounder
     # As long as DML doesnt change the order of the inputs, then things should be good. Otherwise X would be
     # zeroed out and the test will fail
+
     def test_scaling_transforms(self):
         # shouldn't matter if X is scaled much larger or much smaller than W, we should still get good estimates
         n = 2000
diff --git a/econml/tests/test_dmliv.py b/econml/tests/test_dmliv.py
index db8b328d8..54175d0c9 100644
--- a/econml/tests/test_dmliv.py
+++ b/econml/tests/test_dmliv.py
@@ -1,20 +1,23 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 
-import unittest
-import pytest
 import pickle
+import unittest
+
 import numpy as np
+import pytest
 from scipy import special
-from sklearn.linear_model import LinearRegression, LogisticRegression
 from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import LinearRegression, LogisticRegression
+from sklearn.preprocessing import PolynomialFeatures
+
+from econml.iv.dml import OrthoIV, DMLIV, NonParamDMLIV
 from econml.iv.dr._dr import _DummyCATE
 from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression
-from sklearn.preprocessing import PolynomialFeatures
 from econml.utilities import shape
-from econml.iv.dml import OrthoIV, DMLIV, NonParamDMLIV
 
 
+@pytest.mark.cate_api
 class TestDMLIV(unittest.TestCase):
     def test_cate_api(self):
         def const_marg_eff_shape(n, d_x, d_y, binary_T):
diff --git a/econml/tests/test_driv.py b/econml/tests/test_driv.py
index ed3b88eb0..bddff8904 100644
--- a/econml/tests/test_driv.py
+++ b/econml/tests/test_driv.py
@@ -1,161 +1,177 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 
-import unittest
+from econml.iv.dr import (DRIV, LinearDRIV, SparseLinearDRIV, ForestDRIV, IntentToTreatDRIV, LinearIntentToTreatDRIV,)
+from econml.iv.dr._dr import _DummyCATE
+from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression
+from econml.utilities import shape
+
+import itertools
+import numpy as np
 import pytest
 import pickle
-import numpy as np
 from scipy import special
-from sklearn.linear_model import LinearRegression, LogisticRegression
-from econml.iv.dr._dr import _DummyCATE
-from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression
 from sklearn.preprocessing import PolynomialFeatures
-from econml.utilities import shape
-from econml.iv.dr import (DRIV, LinearDRIV, SparseLinearDRIV, ForestDRIV, IntentToTreatDRIV, LinearIntentToTreatDRIV,)
+import unittest
 
 
+@pytest.mark.cate_api
 class TestDRIV(unittest.TestCase):
     def test_cate_api(self):
         def const_marg_eff_shape(n, d_x, binary_T):
+            """Constant marginal effect shape."""
             return (n if d_x else 1,) + ((1,) if binary_T else ())
 
         def marg_eff_shape(n, binary_T):
+            """Marginal effect shape."""
             return (n,) + ((1,) if binary_T else ())
 
         def eff_shape(n, d_x):
+            "Effect shape."
             return (n if d_x else 1,)
 
-        n = 1000
+        n = 500
         y = np.random.normal(size=(n,))
 
-        for d_w in [None, 10]:
+        # parameter combinations to test
+        for d_w, d_x, binary_T, binary_Z, projection, featurizer\
+            in itertools.product(
+                [None, 10],     # d_w
+                [None, 3],      # d_x
+                [True, False],  # binary_T
+                [True, False],  # binary_Z
+                [True, False],  # projection
+                [None, PolynomialFeatures(degree=2, include_bias=False), ]):    # featurizer
+
             if d_w is None:
                 W = None
             else:
                 W = np.random.normal(size=(n, d_w))
-            for d_x in [None, 3]:
-                if d_x is None:
-                    X = None
-                else:
-                    X = np.random.normal(size=(n, d_x))
-                for binary_T in [True, False]:
-                    if binary_T:
-                        T = np.random.choice(["a", "b"], size=(n,))
-                    else:
-                        T = np.random.normal(size=(n,))
-                    for binary_Z in [True, False]:
-                        if binary_Z:
-                            Z = np.random.choice(["c", "d"], size=(n,))
-                        else:
-                            Z = np.random.normal(size=(n,))
-                        for projection in [True, False]:
-                            for featurizer in [
-                                None,
-                                PolynomialFeatures(degree=2, include_bias=False),
-                            ]:
-                                est_list = [
-                                    DRIV(
-                                        flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
-                                        model_final=StatsModelsLinearRegression(
-                                            fit_intercept=False
-                                        ),
-                                        fit_cate_intercept=True,
-                                        projection=projection,
-                                        discrete_instrument=binary_Z,
-                                        discrete_treatment=binary_T,
-                                        featurizer=featurizer,
-                                    ),
-                                    LinearDRIV(
-                                        flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
-                                        fit_cate_intercept=True,
-                                        projection=projection,
-                                        discrete_instrument=binary_Z,
-                                        discrete_treatment=binary_T,
-                                        featurizer=featurizer,
-                                    ),
-                                    SparseLinearDRIV(
-                                        flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
-                                        fit_cate_intercept=True,
-                                        projection=projection,
-                                        discrete_instrument=binary_Z,
-                                        discrete_treatment=binary_T,
-                                        featurizer=featurizer,
-                                    ),
-                                    ForestDRIV(
-                                        flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
-                                        projection=projection,
-                                        discrete_instrument=binary_Z,
-                                        discrete_treatment=binary_T,
-                                        featurizer=featurizer,
-                                    ),
-                                ]
-
-                                if X is None:
-                                    est_list = est_list[:-1]
-
-                                if binary_T and binary_Z:
-                                    est_list += [
-                                        IntentToTreatDRIV(
-                                            flexible_model_effect=StatsModelsLinearRegression(
-                                                fit_intercept=False
-                                            ),
-                                            fit_cate_intercept=True,
-                                            featurizer=featurizer,
-                                        ),
-                                        LinearIntentToTreatDRIV(
-                                            flexible_model_effect=StatsModelsLinearRegression(
-                                                fit_intercept=False
-                                            ),
-                                            featurizer=featurizer,
-                                        ),
-                                    ]
-
-                                for est in est_list:
-                                    with self.subTest(d_w=d_w, d_x=d_x, binary_T=binary_T, binary_Z=binary_Z,
-                                                      projection=projection, featurizer=featurizer,
-                                                      est=est):
-
-                                        # ensure we can serialize unfit estimator
-                                        pickle.dumps(est)
-
-                                        est.fit(y, T, Z=Z, X=X, W=W)
-
-                                        # ensure we can serialize fit estimator
-                                        pickle.dumps(est)
-
-                                        # expected effect size
-                                        const_marginal_effect_shape = const_marg_eff_shape(n, d_x, binary_T)
-                                        marginal_effect_shape = marg_eff_shape(n, binary_T)
-                                        effect_shape = eff_shape(n, d_x)
-                                        # test effect
-                                        const_marg_eff = est.const_marginal_effect(X)
-                                        self.assertEqual(shape(const_marg_eff), const_marginal_effect_shape)
-                                        marg_eff = est.marginal_effect(T, X)
-                                        self.assertEqual(shape(marg_eff), marginal_effect_shape)
-                                        T0 = "a" if binary_T else 0
-                                        T1 = "b" if binary_T else 1
-                                        eff = est.effect(X, T0=T0, T1=T1)
-                                        self.assertEqual(shape(eff), effect_shape)
-
-                                        # test inference
-                                        const_marg_eff_int = est.const_marginal_effect_interval(X)
-                                        marg_eff_int = est.marginal_effect_interval(T, X)
-                                        eff_int = est.effect_interval(X, T0=T0, T1=T1)
-                                        self.assertEqual(shape(const_marg_eff_int), (2,) + const_marginal_effect_shape)
-                                        self.assertEqual(shape(marg_eff_int), (2,) + marginal_effect_shape)
-                                        self.assertEqual(shape(eff_int), (2,) + effect_shape)
-
-                                        # test can run score
-                                        est.score(y, T, Z=Z, X=X, W=W)
-
-                                        if X is not None:
-                                            # test cate_feature_names
-                                            expect_feat_len = featurizer.fit(
-                                                X).n_output_features_ if featurizer else d_x
-                                            self.assertEqual(len(est.cate_feature_names()), expect_feat_len)
-
-                                            # test can run shap values
-                                            shap_values = est.shap_values(X[:10])
+
+            if d_x is None:
+                X = None
+            else:
+                X = np.random.normal(size=(n, d_x))
+
+            if binary_T:
+                T = np.random.choice(["a", "b"], size=(n,))
+            else:
+                T = np.random.normal(size=(n,))
+
+            if binary_Z:
+                Z = np.random.choice(["c", "d"], size=(n,))
+            else:
+                Z = np.random.normal(size=(n,))
+
+            est_list = [
+                DRIV(
+                    flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
+                    model_final=StatsModelsLinearRegression(
+                        fit_intercept=False
+                    ),
+                    fit_cate_intercept=True,
+                    projection=projection,
+                    discrete_instrument=binary_Z,
+                    discrete_treatment=binary_T,
+                    featurizer=featurizer,
+                ),
+                LinearDRIV(
+                    flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
+                    fit_cate_intercept=True,
+                    projection=projection,
+                    discrete_instrument=binary_Z,
+                    discrete_treatment=binary_T,
+                    featurizer=featurizer,
+                ),
+                SparseLinearDRIV(
+                    flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
+                    fit_cate_intercept=True,
+                    projection=projection,
+                    discrete_instrument=binary_Z,
+                    discrete_treatment=binary_T,
+                    featurizer=featurizer,
+                ),
+                ForestDRIV(
+                    flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False),
+                    projection=projection,
+                    discrete_instrument=binary_Z,
+                    discrete_treatment=binary_T,
+                    featurizer=featurizer,
+                ),
+            ]
+
+            if X is None:
+                est_list = est_list[:-1]
+
+            if binary_T and binary_Z:
+                est_list += [
+                    IntentToTreatDRIV(
+                        flexible_model_effect=StatsModelsLinearRegression(
+                            fit_intercept=False
+                        ),
+                        fit_cate_intercept=True,
+                        featurizer=featurizer,
+                    ),
+                    LinearIntentToTreatDRIV(
+                        flexible_model_effect=StatsModelsLinearRegression(
+                            fit_intercept=False
+                        ),
+                        featurizer=featurizer,
+                    ),
+                ]
+
+            for est in est_list:
+                with self.subTest(d_w=d_w, d_x=d_x, binary_T=binary_T,
+                                  binary_Z=binary_Z, projection=projection, featurizer=featurizer,
+                                  est=est):
+
+                    # TODO: serializing/deserializing for every combination -- is this necessary?
+                    # ensure we can serialize unfit estimator
+                    pickle.dumps(est)
+
+                    est.fit(y, T, Z=Z, X=X, W=W)
+
+                    # ensure we can serialize fit estimator
+                    pickle.dumps(est)
+
+                    # expected effect size
+                    exp_const_marginal_effect_shape = const_marg_eff_shape(n, d_x, binary_T)
+                    marginal_effect_shape = marg_eff_shape(n, binary_T)
+                    effect_shape = eff_shape(n, d_x)
+
+                    # assert calculated constant marginal effect shape is expected
+                    # const_marginal effect is defined in LinearCateEstimator class
+                    const_marg_eff = est.const_marginal_effect(X)
+                    self.assertEqual(shape(const_marg_eff), exp_const_marginal_effect_shape)
+
+                    # assert calculated marginal effect shape is expected
+                    marg_eff = est.marginal_effect(T, X)
+                    self.assertEqual(shape(marg_eff), marginal_effect_shape)
+
+                    T0 = "a" if binary_T else 0
+                    T1 = "b" if binary_T else 1
+                    eff = est.effect(X, T0=T0, T1=T1)
+                    self.assertEqual(shape(eff), effect_shape)
+
+                    # test inference
+                    const_marg_eff_int = est.const_marginal_effect_interval(X)
+                    marg_eff_int = est.marginal_effect_interval(T, X)
+                    eff_int = est.effect_interval(X, T0=T0, T1=T1)
+                    self.assertEqual(shape(const_marg_eff_int), (2,) + exp_const_marginal_effect_shape)
+                    self.assertEqual(shape(marg_eff_int), (2,) + marginal_effect_shape)
+                    self.assertEqual(shape(eff_int), (2,) + effect_shape)
+
+                    # test can run score
+                    est.score(y, T, Z=Z, X=X, W=W)
+
+                    if X is not None:
+                        # test cate_feature_names
+                        expect_feat_len = featurizer.fit(
+                            X).n_output_features_ if featurizer else d_x
+                        self.assertEqual(len(est.cate_feature_names()), expect_feat_len)
+
+                        # test can run shap values
+                        _ = est.shap_values(X[:10])
 
     def test_accuracy(self):
         np.random.seed(123)
diff --git a/econml/tests/test_drlearner.py b/econml/tests/test_drlearner.py
index 3674aa7af..1f81bd0ab 100644
--- a/econml/tests/test_drlearner.py
+++ b/econml/tests/test_drlearner.py
@@ -1,29 +1,31 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 
-import numpy as np
+from contextlib import ExitStack
+import pickle
 import unittest
+
+import numpy as np
+from numpy.random import normal, multivariate_normal, binomial
 import pytest
-import pickle
+
+import scipy.special
 from sklearn.base import TransformerMixin
-from numpy.random import normal, multivariate_normal, binomial
+from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor, RandomForestRegressor
 from sklearn.exceptions import DataConversionWarning
 from sklearn.linear_model import LinearRegression, Lasso, LassoCV, LogisticRegression
-from sklearn.pipeline import Pipeline
-from sklearn.preprocessing import OneHotEncoder, FunctionTransformer
 from sklearn.model_selection import KFold, GroupKFold
-from sklearn.preprocessing import PolynomialFeatures
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import OneHotEncoder, FunctionTransformer, PolynomialFeatures
+
 from econml.dr import DRLearner, LinearDRLearner, SparseLinearDRLearner, ForestDRLearner
-from econml.utilities import shape, hstack, vstack, reshape, cross_product
 from econml.inference import BootstrapInference, StatsModelsInferenceDiscrete
-from contextlib import ExitStack
-from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor, RandomForestRegressor
-from sklearn.linear_model import LinearRegression, LogisticRegression
+from econml.utilities import shape, hstack, vstack, reshape, cross_product
 from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression
-import scipy.special
 import econml.tests.utilities  # bugfix for assertWarns
 
 
+@pytest.mark.serial
 class TestDRLearner(unittest.TestCase):
 
     @classmethod
diff --git a/econml/tests/test_dynamic_dml.py b/econml/tests/test_dynamic_dml.py
index 7539c18f9..d007a2706 100644
--- a/econml/tests/test_dynamic_dml.py
+++ b/econml/tests/test_dynamic_dml.py
@@ -16,7 +16,7 @@
 from econml.tests.dgp import DynamicPanelDGP
 
 
-@pytest.mark.dml
+@pytest.mark.cate_api
 class TestDynamicDML(unittest.TestCase):
 
     def test_cate_api(self):
diff --git a/econml/tests/test_statsmodels.py b/econml/tests/test_statsmodels.py
index 75a544cdf..9d47ba69b 100644
--- a/econml/tests/test_statsmodels.py
+++ b/econml/tests/test_statsmodels.py
@@ -1,31 +1,29 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 
+import unittest
+
 import numpy as np
 import pytest
-from econml.dml import DML, LinearDML, NonParamDML
+
+import scipy.special
+from sklearn.base import clone
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import LinearRegression, LogisticRegression
+from sklearn.preprocessing import PolynomialFeatures
+from statsmodels.regression.linear_model import WLS
+from statsmodels.sandbox.regression.gmm import IV2SLS
+from statsmodels.tools.tools import add_constant
+
+from econml.inference import StatsModelsInference, StatsModelsInferenceDiscrete
+from econml.dml import LinearDML, NonParamDML
 from econml.dr import LinearDRLearner
-from econml.iv.dr import LinearDRIV
 from econml.iv.dml import DMLIV
-from econml.inference import StatsModelsInference, StatsModelsInferenceDiscrete
-from econml.utilities import (ndim, transpose, shape, reshape, hstack, WeightedModelWrapper)
+from econml.iv.dr import LinearDRIV
 from econml.sklearn_extensions.linear_model import WeightedLasso, StatsModelsLinearRegression
-from econml.iv.dr._dr import _DummyCATE
-from statsmodels.regression.linear_model import WLS
-from statsmodels.tools.tools import add_constant
-from statsmodels.sandbox.regression.gmm import IV2SLS
-from sklearn.dummy import DummyClassifier
-from sklearn.linear_model import LinearRegression, LogisticRegression, LassoCV, Lasso, MultiTaskLassoCV
-from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
-from sklearn.model_selection import KFold, StratifiedKFold
-import scipy.special
-import time
 from econml.sklearn_extensions.linear_model import StatsModelsLinearRegression as OLS
 from econml.sklearn_extensions.linear_model import StatsModels2SLS
-import unittest
-import joblib
-from sklearn.preprocessing import PolynomialFeatures
-from sklearn.base import clone
+from econml.utilities import (ndim, transpose, shape, reshape, hstack, WeightedModelWrapper)
 
 
 class StatsModelsOLS:
@@ -267,6 +265,7 @@ def _compare_dr_classes(est, lr, X_test, alpha=.05, tol=1e-10):
         "{}, {}".format(est.effect_interval(X_test, alpha=alpha), lr.effect_interval(X_test, alpha=alpha))
 
 
+@pytest.mark.serial
 class TestStatsModels(unittest.TestCase):
 
     def test_comp_with_lr(self):
@@ -1099,8 +1098,6 @@ def split(self, X, T):
 
     def test_dml_multi_dim_treatment_outcome(self):
         """ Testing that the summarized and unsummarized version of DML gives the correct (known results). """
-        from econml.dml import LinearDML
-        from econml.inference import StatsModelsInference
         np.random.seed(123)
         n = 100000
         precision = .01
diff --git a/econml/tests/test_tree.py b/econml/tests/test_tree.py
index 7de214f6c..b5d898f56 100644
--- a/econml/tests/test_tree.py
+++ b/econml/tests/test_tree.py
@@ -2,15 +2,14 @@
 # Licensed under the MIT License.
 
 import unittest
-import logging
-import time
-import random
+
 import numpy as np
-import sparse as sp
 import pytest
+
 from econml.tree import DepthFirstTreeBuilder, BestSplitter, Tree, MSE
 
 
+@pytest.mark.serial
 class TestTree(unittest.TestCase):
 
     def _get_base_config(self):
@@ -259,10 +258,14 @@ def test_honest_values(self):
         np.testing.assert_array_almost_equal(tree.value.flatten(), .4 * np.ones(len(tree.value)))
 
     def test_noisy_instance(self):
+
+        # initialize parameters
         n_samples = 5000
         X = np.random.normal(0, 1, size=(n_samples, 1))
         y_base = 1.0 * X[:, [0]] * (X[:, [0]] > 0)
         y = y_base + np.random.normal(0, .1, size=(n_samples, 1))
+
+        # initialize config wtih base config and overwite some values
         config = self._get_base_config()
         config['n_features'] = 1
         config['max_features'] = 1
@@ -274,11 +277,16 @@ def test_noisy_instance(self):
         config['max_node_samples'] = X.shape[0]
         config['samples_train'] = np.arange(X.shape[0], dtype=np.intp)
         config['samples_val'] = np.arange(X.shape[0], dtype=np.intp)
+
+        # predict tree using config parameters and assert
+        # shape of trained tree is the same as y_test
         tree = self._train_tree(config, X, y)
         X_test = np.zeros((100, 1))
         X_test[:, 0] = np.linspace(np.percentile(X, 10), np.percentile(X, 90), 100)
         y_test = 1.0 * X_test[:, [0]] * (X_test[:, [0]] > 0)
         np.testing.assert_array_almost_equal(tree.predict(X_test), y_test, decimal=1)
+
+        # initialize config wtih base honest config and overwite some values
         config = self._get_base_honest_config()
         config['n_features'] = 1
         config['max_features'] = 1
@@ -290,6 +298,9 @@ def test_noisy_instance(self):
         config['max_node_samples'] = X.shape[0] // 2
         config['samples_train'] = np.arange(X.shape[0] // 2, dtype=np.intp)
         config['samples_val'] = np.arange(X.shape[0] // 2, X.shape[0], dtype=np.intp)
+
+        # predict tree using config parameters and assert
+        # shape of trained tree is the same as y_test
         tree = self._train_tree(config, X, y)
         X_test = np.zeros((100, 1))
         X_test[:, 0] = np.linspace(np.percentile(X, 10), np.percentile(X, 90), 100)
diff --git a/pyproject.toml b/pyproject.toml
index 9f0652dbb..bf2e2991e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,5 +15,6 @@ markers = [
     "notebook",
     "automl",
     "dml",
-    "causal"
+    "serial",
+    "cate_api"
 ]
\ No newline at end of file