From e0d3093c14f04d8ec7d7ec10f793b42923873e70 Mon Sep 17 00:00:00 2001
From: Jarek Potiuk <jarek.potiuk@polidea.com>
Date: Tue, 8 Sep 2020 07:36:12 +0200
Subject: [PATCH] Removes stable tests from quarantine (#10768)

We've observed the tests for last couple of weeks and it seems
most of the tests marked with "quarantine" marker are succeeding
in a stable way (https://github.com/apache/airflow/issues/10118)
The removed tests have success ratio of > 95% (20 runs without
problems) and this has been verified a week ago as well,
so it seems they are rather stable.

There are literally few that are either failing or causing
the Quarantined builds to hang. I manually reviewed the
master tests that failed for last few weeks and added the
tests that are causing the build to hang.

Seems that stability has improved - which might be casued
by some temporary problems when we marked the quarantined builds
or too "generous" way of marking test as quarantined, or
maybe improvement comes from the #10368 as the docker engine
and machines used to run the builds in GitHub experience far
less load (image builds are executed in separate builds) so
it might be that resource usage is decreased. Another reason
might be Github Actions stability improvements.

Or simply those tests are more stable when run isolation.

We might still add failing tests back as soon we see them behave
in a flaky way.

The remaining quarantined tests that need to be fixed:
 * test_local_run (often hangs the build)
 * test_retry_handling_job
 * test_clear_multiple_external_task_marker
 * test_should_force_kill_process
 * test_change_state_for_tis_without_dagrun
 * test_cli_webserver_background

We also move some of those tests to "heisentests" category
Those testst run fine in isolation but fail
the builds when run with all other tests:
 * TestImpersonation tests

We might find that those heisentest can be fixed but for
now we are going to run them in isolation.

Also - since those quarantined tests are failing more often
the "num runs" to track for those has been decreased to 10
to keep track of 10 last runs only.

GitOrigin-RevId: b746f33fc66ce2ecdc6d72a9943fc2db00da0f45
---
 .github/workflows/ci.yml                     |  8 ++++----
 TESTING.rst                                  | 12 ++++++++++++
 scripts/ci/docker-compose/base.yml           |  1 +
 scripts/ci/testing/ci_run_airflow_testing.sh |  6 ++----
 scripts/in_container/entrypoint_ci.sh        |  9 +++++++++
 tests/cli/commands/test_dag_command.py       |  2 --
 tests/cli/commands/test_task_command.py      |  1 -
 tests/conftest.py                            | 18 ++++++++++++++++++
 tests/executors/test_dask_executor.py        |  3 ---
 tests/jobs/test_backfill_job.py              |  1 -
 tests/jobs/test_local_task_job.py            |  2 --
 tests/jobs/test_scheduler_job.py             |  5 +----
 tests/sensors/test_timeout_sensor.py         |  3 ---
 tests/test_impersonation.py                  |  2 +-
 tests/utils/test_process_utils.py            |  2 +-
 tests/www/test_views.py                      |  1 -
 16 files changed, 49 insertions(+), 27 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 61ae572d722..bf89cdd3a56 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -298,7 +298,7 @@ jobs:
       matrix:
         python-version: [3.6, 3.7]
         postgres-version: [9.6, 10]
-        test-type: [Core, Integration]
+        test-type: [Core, Integration, Heisentests]
       fail-fast: false
     env:
       BACKEND: postgres
@@ -344,7 +344,7 @@ jobs:
       matrix:
         python-version: [3.7, 3.8]
         mysql-version: [5.7]
-        test-type: [Core, Integration]
+        test-type: [Core, Integration, Heisentests]
       fail-fast: false
     env:
       BACKEND: mysql
@@ -388,7 +388,7 @@ jobs:
     strategy:
       matrix:
         python-version: [3.6, 3.8]
-        test-type: [Core, Integration]
+        test-type: [Core, Integration, Heisentests]
       fail-fast: false
     env:
       BACKEND: sqlite
@@ -439,7 +439,7 @@ jobs:
       POSTGRES_VERSION: ${{ matrix.postgres-version }}
       RUN_TESTS: true
       TEST_TYPE: Quarantined
-      NUM_RUNS: 20
+      NUM_RUNS: 10
       GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
     if: >
       (needs.trigger-tests.outputs.run-tests == 'true' || github.event_name != 'pull_request') &&
diff --git a/TESTING.rst b/TESTING.rst
index 93649895f6e..7b3386af8a1 100644
--- a/TESTING.rst
+++ b/TESTING.rst
@@ -324,6 +324,18 @@ Those tests are marked with ``@pytest.mark.quarantined`` annotation.
 Those tests are skipped by default. You can enable them with ``--include-quarantined`` flag. You
 can also decide to only run tests with ``-m quarantined`` flag to run only those tests.
 
+Heisen tests
+------------
+
+Some of our tests are Heisentests. This means that they run fine in isolation but when they run together with
+others they might fail the tests (this is likely due to resource consumptions). Therefore we run those tests
+in isolation.
+
+Those tests are marked with ``@pytest.mark.heisentests`` annotation.
+Those tests are skipped by default. You can enable them with ``--include-heisentests`` flag. You
+can also decide to only run tests with ``-m heisentests`` flag to run only those tests.
+
+
 Running Tests with Kubernetes
 =============================
 
diff --git a/scripts/ci/docker-compose/base.yml b/scripts/ci/docker-compose/base.yml
index f20be68c9c7..77792de458f 100644
--- a/scripts/ci/docker-compose/base.yml
+++ b/scripts/ci/docker-compose/base.yml
@@ -41,6 +41,7 @@ services:
       - RUN_INTEGRATION_TESTS
       - ONLY_RUN_LONG_RUNNING_TESTS
       - ONLY_RUN_QUARANTINED_TESTS
+      - ONLY_RUN_HEISEN_TESTS
       - GITHUB_TOKEN
       - GITHUB_REPOSITORY
       - ISSUE_ID
diff --git a/scripts/ci/testing/ci_run_airflow_testing.sh b/scripts/ci/testing/ci_run_airflow_testing.sh
index 38df1425ba6..62f09439846 100755
--- a/scripts/ci/testing/ci_run_airflow_testing.sh
+++ b/scripts/ci/testing/ci_run_airflow_testing.sh
@@ -26,6 +26,8 @@ if [[ ${TEST_TYPE:=} == "Integration" ]]; then
     export RUN_INTEGRATION_TESTS="${AVAILABLE_INTEGRATIONS}"
 elif [[ ${TEST_TYPE:=} == "Long" ]]; then
     export ONLY_RUN_LONG_RUNNING_TESTS="true"
+elif [[ ${TEST_TYPE:=} == "Heisentests" ]]; then
+    export ONLY_RUN_HEISEN_TESTS="true"
 elif [[ ${TEST_TYPE:=} == "Quarantined" ]]; then
     export ONLY_RUN_QUARANTINED_TESTS="true"
     # Do not fail in quarantined tests
@@ -128,7 +130,3 @@ echo
 RUN_INTEGRATION_TESTS=${RUN_INTEGRATION_TESTS:=""}
 
 run_airflow_testing_in_docker "${@}"
-
-if [[ ${TEST_TYPE:=} == "Quarantined" ]]; then
-    export ONLY_RUN_QUARANTINED_TESTS="true"
-fi
diff --git a/scripts/in_container/entrypoint_ci.sh b/scripts/in_container/entrypoint_ci.sh
index 25fb4806a2a..55cfc827d38 100755
--- a/scripts/in_container/entrypoint_ci.sh
+++ b/scripts/in_container/entrypoint_ci.sh
@@ -216,6 +216,15 @@ elif [[ ${ONLY_RUN_LONG_RUNNING_TESTS:=""} == "true" ]]; then
         "--execution-timeout=120"
         "--teardown-timeout=30"
     )
+elif [[ ${ONLY_RUN_HEISEN_TESTS:=""} == "true" ]]; then
+    EXTRA_PYTEST_ARGS+=(
+        "-m" "heisentests"
+        "--include-heisentests"
+        "--verbosity=1"
+        "--setup-timeout=20"
+        "--execution-timeout=50"
+        "--teardown-timeout=20"
+    )
 elif [[ ${ONLY_RUN_QUARANTINED_TESTS:=""} == "true" ]]; then
     EXTRA_PYTEST_ARGS+=(
         "-m" "quarantined"
diff --git a/tests/cli/commands/test_dag_command.py b/tests/cli/commands/test_dag_command.py
index 03579ad76da..7e776c5c11d 100644
--- a/tests/cli/commands/test_dag_command.py
+++ b/tests/cli/commands/test_dag_command.py
@@ -23,7 +23,6 @@
 from datetime import datetime, timedelta
 
 import mock
-import pytest
 
 from airflow import settings
 from airflow.cli import cli_parser
@@ -381,7 +380,6 @@ def test_pause(self):
         dag_command.dag_unpause(args)
         self.assertIn(self.dagbag.dags['example_bash_operator'].get_is_paused(), [False, 0])
 
-    @pytest.mark.quarantined
     def test_trigger_dag(self):
         dag_command.dag_trigger(self.parser.parse_args([
             'dags', 'trigger', 'example_bash_operator',
diff --git a/tests/cli/commands/test_task_command.py b/tests/cli/commands/test_task_command.py
index 5d4918a2bb4..0a4b9916677 100644
--- a/tests/cli/commands/test_task_command.py
+++ b/tests/cli/commands/test_task_command.py
@@ -355,7 +355,6 @@ def setUp(self):
 
         self.parser = cli_parser.get_parser()
 
-    @pytest.mark.quarantined
     def test_run_ignores_all_dependencies(self):
         """
         Test that run respects ignore_all_dependencies
diff --git a/tests/conftest.py b/tests/conftest.py
index eac88d62719..20c072bd409 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -154,6 +154,11 @@ def pytest_addoption(parser):
         action="store_true",
         help="Includes quarantined tests (marked with quarantined marker). They are skipped by default.",
     )
+    group.addoption(
+        "--include-heisentests",
+        action="store_true",
+        help="Includes heisentests (marked with heisentests marker). They are skipped by default.",
+    )
     allowed_trace_sql_columns_list = ",".join(ALLOWED_TRACE_SQL_COLUMNS)
     group.addoption(
         "--trace-sql",
@@ -245,6 +250,9 @@ def pytest_configure(config):
     config.addinivalue_line(
         "markers", "quarantined: mark test that are in quarantine (i.e. flaky, need to be isolated and fixed)"
     )
+    config.addinivalue_line(
+        "markers", "heisentests: mark test that should be run in isolation due to resource consumption"
+    )
     config.addinivalue_line(
         "markers", "credential_file(name): mark tests that require credential file in CREDENTIALS_DIR"
     )
@@ -308,6 +316,13 @@ def skip_quarantined_test(item):
                     format(item=item))
 
 
+def skip_heisen_test(item):
+    for _ in item.iter_markers(name="heisentests"):
+        pytest.skip("The test is skipped because it has heisentests marker. "
+                    "And --include-heisentests flag is passed to pytest. {item}".
+                    format(item=item))
+
+
 def skip_if_integration_disabled(marker, item):
     integration_name = marker.args[0]
     environment_variable_name = "INTEGRATION_" + integration_name.upper()
@@ -355,6 +370,7 @@ def pytest_runtest_setup(item):
 
     include_long_running = item.config.getoption("--include-long-running")
     include_quarantined = item.config.getoption("--include-quarantined")
+    include_heisentests = item.config.getoption("--include-heisentests")
 
     for marker in item.iter_markers(name="integration"):
         skip_if_integration_disabled(marker, item)
@@ -373,6 +389,8 @@ def pytest_runtest_setup(item):
         skip_long_running_test(item)
     if not include_quarantined:
         skip_quarantined_test(item)
+    if not include_heisentests:
+        skip_heisen_test(item)
     skip_if_credential_file_missing(item)
     skip_if_airflow_2_test(item)
 
diff --git a/tests/executors/test_dask_executor.py b/tests/executors/test_dask_executor.py
index ea6e4f6f827..2f43271acb4 100644
--- a/tests/executors/test_dask_executor.py
+++ b/tests/executors/test_dask_executor.py
@@ -19,8 +19,6 @@
 from datetime import timedelta
 from unittest import mock
 
-import pytest
-
 from airflow.jobs.backfill_job import BackfillJob
 from airflow.models import DagBag
 from airflow.utils import timezone
@@ -83,7 +81,6 @@ def test_dask_executor_functions(self):
         executor = DaskExecutor(cluster_address=self.cluster.scheduler_address)
         self.assert_tasks_on_executor(executor)
 
-    @pytest.mark.quarantined
     def test_backfill_integration(self):
         """
         Test that DaskExecutor can be used to backfill example dags
diff --git a/tests/jobs/test_backfill_job.py b/tests/jobs/test_backfill_job.py
index 7962172cd92..04abe9d7575 100644
--- a/tests/jobs/test_backfill_job.py
+++ b/tests/jobs/test_backfill_job.py
@@ -52,7 +52,6 @@
 DEFAULT_DATE = timezone.datetime(2016, 1, 1)
 
 
-@pytest.mark.quarantined
 class TestBackfillJob(unittest.TestCase):
 
     def _get_dummy_dag(self, dag_id, pool=Pool.DEFAULT_POOL_NAME, task_concurrency=None):
diff --git a/tests/jobs/test_local_task_job.py b/tests/jobs/test_local_task_job.py
index 1a8b4f9ccdb..09c18eaac30 100644
--- a/tests/jobs/test_local_task_job.py
+++ b/tests/jobs/test_local_task_job.py
@@ -255,7 +255,6 @@ def test_localtaskjob_double_trigger(self):
 
         session.close()
 
-    @pytest.mark.quarantined
     def test_localtaskjob_maintain_heart_rate(self):
         dagbag = DagBag(
             dag_folder=TEST_DAG_FOLDER,
@@ -360,7 +359,6 @@ def task_function(ti):
         self.assertNotIn('reached_end_of_sleep', data,
                          'Task should not have been allowed to run to completion')
 
-    @pytest.mark.quarantined
     def test_mark_success_on_success_callback(self):
         """
         Test that ensures that where a task is marked suceess in the UI
diff --git a/tests/jobs/test_scheduler_job.py b/tests/jobs/test_scheduler_job.py
index 2f6fa6db5c5..00325fe35ba 100644
--- a/tests/jobs/test_scheduler_job.py
+++ b/tests/jobs/test_scheduler_job.py
@@ -1241,7 +1241,7 @@ def test_should_mark_dummy_task_as_success(self):
                 self.assertIsNone(duration)
 
 
-@pytest.mark.quarantined
+@pytest.mark.heisentests
 class TestDagFileProcessorQueriesCount(unittest.TestCase):
     """
     These tests are designed to detect changes in the number of queries for different DAG files.
@@ -2154,7 +2154,6 @@ def test_execute_task_instances_limit(self):
             self.assertEqual(State.QUEUED, ti.state)
 
     @pytest.mark.quarantined
-    @pytest.mark.xfail(condition=True, reason="The test is flaky with nondeterministic result")
     def test_change_state_for_tis_without_dagrun(self):
         dag1 = DAG(dag_id='test_change_state_for_tis_without_dagrun', start_date=DEFAULT_DATE)
 
@@ -2937,7 +2936,6 @@ def do_schedule(mock_dagbag):
         ti.refresh_from_db()
         self.assertEqual(State.SUCCESS, ti.state)
 
-    @pytest.mark.quarantined
     def test_retry_still_in_executor(self):
         """
         Checks if the scheduler does not put a task in limbo, when a task is retried
@@ -3025,7 +3023,6 @@ def run_with_error(ti, ignore_ti_state=False):
         self.assertEqual(ti.state, State.SUCCESS)
 
     @pytest.mark.quarantined
-    @pytest.mark.xfail(condition=True, reason="This test is failing!")
     def test_retry_handling_job(self):
         """
         Integration test of the scheduler not accidentally resetting
diff --git a/tests/sensors/test_timeout_sensor.py b/tests/sensors/test_timeout_sensor.py
index 70228ddc485..09b35b3ad22 100644
--- a/tests/sensors/test_timeout_sensor.py
+++ b/tests/sensors/test_timeout_sensor.py
@@ -19,8 +19,6 @@
 import unittest
 from datetime import timedelta
 
-import pytest
-
 from airflow.exceptions import AirflowSensorTimeout, AirflowSkipException
 from airflow.models.dag import DAG
 from airflow.sensors.base_sensor_operator import BaseSensorOperator
@@ -73,7 +71,6 @@ def setUp(self):
         }
         self.dag = DAG(TEST_DAG_ID, default_args=args)
 
-    @pytest.mark.quarantined
     def test_timeout(self):
         op = TimeoutTestSensor(
             task_id='test_timeout',
diff --git a/tests/test_impersonation.py b/tests/test_impersonation.py
index 56c5cdf43b0..30df63c06bd 100644
--- a/tests/test_impersonation.py
+++ b/tests/test_impersonation.py
@@ -109,7 +109,7 @@ def create_user():
             )
 
 
-@pytest.mark.quarantined
+@pytest.mark.heisentests
 class TestImpersonation(unittest.TestCase):
 
     def setUp(self):
diff --git a/tests/utils/test_process_utils.py b/tests/utils/test_process_utils.py
index e67afc97ffd..1620bfe747b 100644
--- a/tests/utils/test_process_utils.py
+++ b/tests/utils/test_process_utils.py
@@ -125,7 +125,6 @@ def my_sleep_subprocess_with_signals():
     sleep(100)
 
 
-@pytest.mark.quarantined
 class TestKillChildProcessesByPids(unittest.TestCase):
     def test_should_kill_process(self):
         before_num_process = subprocess.check_output(["ps", "-ax", "-o", "pid="]).decode().count("\n")
@@ -142,6 +141,7 @@ def test_should_kill_process(self):
         num_process = subprocess.check_output(["ps", "-ax", "-o", "pid="]).decode().count("\n")
         self.assertEqual(before_num_process, num_process)
 
+    @pytest.mark.quarantined
     def test_should_force_kill_process(self):
         before_num_process = subprocess.check_output(["ps", "-ax", "-o", "pid="]).decode().count("\n")
 
diff --git a/tests/www/test_views.py b/tests/www/test_views.py
index 348d3a9accd..f276dd8738b 100644
--- a/tests/www/test_views.py
+++ b/tests/www/test_views.py
@@ -2411,7 +2411,6 @@ def test_user_defined_filter_and_macros_raise_error(self):
         )
 
 
-@pytest.mark.quarantined
 class TestTriggerDag(TestBase):
 
     def setUp(self):