From e0d3093c14f04d8ec7d7ec10f793b42923873e70 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Tue, 8 Sep 2020 07:36:12 +0200 Subject: [PATCH] Removes stable tests from quarantine (#10768) We've observed the tests for last couple of weeks and it seems most of the tests marked with "quarantine" marker are succeeding in a stable way (https://github.com/apache/airflow/issues/10118) The removed tests have success ratio of > 95% (20 runs without problems) and this has been verified a week ago as well, so it seems they are rather stable. There are literally few that are either failing or causing the Quarantined builds to hang. I manually reviewed the master tests that failed for last few weeks and added the tests that are causing the build to hang. Seems that stability has improved - which might be casued by some temporary problems when we marked the quarantined builds or too "generous" way of marking test as quarantined, or maybe improvement comes from the #10368 as the docker engine and machines used to run the builds in GitHub experience far less load (image builds are executed in separate builds) so it might be that resource usage is decreased. Another reason might be Github Actions stability improvements. Or simply those tests are more stable when run isolation. We might still add failing tests back as soon we see them behave in a flaky way. The remaining quarantined tests that need to be fixed: * test_local_run (often hangs the build) * test_retry_handling_job * test_clear_multiple_external_task_marker * test_should_force_kill_process * test_change_state_for_tis_without_dagrun * test_cli_webserver_background We also move some of those tests to "heisentests" category Those testst run fine in isolation but fail the builds when run with all other tests: * TestImpersonation tests We might find that those heisentest can be fixed but for now we are going to run them in isolation. Also - since those quarantined tests are failing more often the "num runs" to track for those has been decreased to 10 to keep track of 10 last runs only. GitOrigin-RevId: b746f33fc66ce2ecdc6d72a9943fc2db00da0f45 --- .github/workflows/ci.yml | 8 ++++---- TESTING.rst | 12 ++++++++++++ scripts/ci/docker-compose/base.yml | 1 + scripts/ci/testing/ci_run_airflow_testing.sh | 6 ++---- scripts/in_container/entrypoint_ci.sh | 9 +++++++++ tests/cli/commands/test_dag_command.py | 2 -- tests/cli/commands/test_task_command.py | 1 - tests/conftest.py | 18 ++++++++++++++++++ tests/executors/test_dask_executor.py | 3 --- tests/jobs/test_backfill_job.py | 1 - tests/jobs/test_local_task_job.py | 2 -- tests/jobs/test_scheduler_job.py | 5 +---- tests/sensors/test_timeout_sensor.py | 3 --- tests/test_impersonation.py | 2 +- tests/utils/test_process_utils.py | 2 +- tests/www/test_views.py | 1 - 16 files changed, 49 insertions(+), 27 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 61ae572d722..bf89cdd3a56 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -298,7 +298,7 @@ jobs: matrix: python-version: [3.6, 3.7] postgres-version: [9.6, 10] - test-type: [Core, Integration] + test-type: [Core, Integration, Heisentests] fail-fast: false env: BACKEND: postgres @@ -344,7 +344,7 @@ jobs: matrix: python-version: [3.7, 3.8] mysql-version: [5.7] - test-type: [Core, Integration] + test-type: [Core, Integration, Heisentests] fail-fast: false env: BACKEND: mysql @@ -388,7 +388,7 @@ jobs: strategy: matrix: python-version: [3.6, 3.8] - test-type: [Core, Integration] + test-type: [Core, Integration, Heisentests] fail-fast: false env: BACKEND: sqlite @@ -439,7 +439,7 @@ jobs: POSTGRES_VERSION: ${{ matrix.postgres-version }} RUN_TESTS: true TEST_TYPE: Quarantined - NUM_RUNS: 20 + NUM_RUNS: 10 GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} if: > (needs.trigger-tests.outputs.run-tests == 'true' || github.event_name != 'pull_request') && diff --git a/TESTING.rst b/TESTING.rst index 93649895f6e..7b3386af8a1 100644 --- a/TESTING.rst +++ b/TESTING.rst @@ -324,6 +324,18 @@ Those tests are marked with ``@pytest.mark.quarantined`` annotation. Those tests are skipped by default. You can enable them with ``--include-quarantined`` flag. You can also decide to only run tests with ``-m quarantined`` flag to run only those tests. +Heisen tests +------------ + +Some of our tests are Heisentests. This means that they run fine in isolation but when they run together with +others they might fail the tests (this is likely due to resource consumptions). Therefore we run those tests +in isolation. + +Those tests are marked with ``@pytest.mark.heisentests`` annotation. +Those tests are skipped by default. You can enable them with ``--include-heisentests`` flag. You +can also decide to only run tests with ``-m heisentests`` flag to run only those tests. + + Running Tests with Kubernetes ============================= diff --git a/scripts/ci/docker-compose/base.yml b/scripts/ci/docker-compose/base.yml index f20be68c9c7..77792de458f 100644 --- a/scripts/ci/docker-compose/base.yml +++ b/scripts/ci/docker-compose/base.yml @@ -41,6 +41,7 @@ services: - RUN_INTEGRATION_TESTS - ONLY_RUN_LONG_RUNNING_TESTS - ONLY_RUN_QUARANTINED_TESTS + - ONLY_RUN_HEISEN_TESTS - GITHUB_TOKEN - GITHUB_REPOSITORY - ISSUE_ID diff --git a/scripts/ci/testing/ci_run_airflow_testing.sh b/scripts/ci/testing/ci_run_airflow_testing.sh index 38df1425ba6..62f09439846 100755 --- a/scripts/ci/testing/ci_run_airflow_testing.sh +++ b/scripts/ci/testing/ci_run_airflow_testing.sh @@ -26,6 +26,8 @@ if [[ ${TEST_TYPE:=} == "Integration" ]]; then export RUN_INTEGRATION_TESTS="${AVAILABLE_INTEGRATIONS}" elif [[ ${TEST_TYPE:=} == "Long" ]]; then export ONLY_RUN_LONG_RUNNING_TESTS="true" +elif [[ ${TEST_TYPE:=} == "Heisentests" ]]; then + export ONLY_RUN_HEISEN_TESTS="true" elif [[ ${TEST_TYPE:=} == "Quarantined" ]]; then export ONLY_RUN_QUARANTINED_TESTS="true" # Do not fail in quarantined tests @@ -128,7 +130,3 @@ echo RUN_INTEGRATION_TESTS=${RUN_INTEGRATION_TESTS:=""} run_airflow_testing_in_docker "${@}" - -if [[ ${TEST_TYPE:=} == "Quarantined" ]]; then - export ONLY_RUN_QUARANTINED_TESTS="true" -fi diff --git a/scripts/in_container/entrypoint_ci.sh b/scripts/in_container/entrypoint_ci.sh index 25fb4806a2a..55cfc827d38 100755 --- a/scripts/in_container/entrypoint_ci.sh +++ b/scripts/in_container/entrypoint_ci.sh @@ -216,6 +216,15 @@ elif [[ ${ONLY_RUN_LONG_RUNNING_TESTS:=""} == "true" ]]; then "--execution-timeout=120" "--teardown-timeout=30" ) +elif [[ ${ONLY_RUN_HEISEN_TESTS:=""} == "true" ]]; then + EXTRA_PYTEST_ARGS+=( + "-m" "heisentests" + "--include-heisentests" + "--verbosity=1" + "--setup-timeout=20" + "--execution-timeout=50" + "--teardown-timeout=20" + ) elif [[ ${ONLY_RUN_QUARANTINED_TESTS:=""} == "true" ]]; then EXTRA_PYTEST_ARGS+=( "-m" "quarantined" diff --git a/tests/cli/commands/test_dag_command.py b/tests/cli/commands/test_dag_command.py index 03579ad76da..7e776c5c11d 100644 --- a/tests/cli/commands/test_dag_command.py +++ b/tests/cli/commands/test_dag_command.py @@ -23,7 +23,6 @@ from datetime import datetime, timedelta import mock -import pytest from airflow import settings from airflow.cli import cli_parser @@ -381,7 +380,6 @@ def test_pause(self): dag_command.dag_unpause(args) self.assertIn(self.dagbag.dags['example_bash_operator'].get_is_paused(), [False, 0]) - @pytest.mark.quarantined def test_trigger_dag(self): dag_command.dag_trigger(self.parser.parse_args([ 'dags', 'trigger', 'example_bash_operator', diff --git a/tests/cli/commands/test_task_command.py b/tests/cli/commands/test_task_command.py index 5d4918a2bb4..0a4b9916677 100644 --- a/tests/cli/commands/test_task_command.py +++ b/tests/cli/commands/test_task_command.py @@ -355,7 +355,6 @@ def setUp(self): self.parser = cli_parser.get_parser() - @pytest.mark.quarantined def test_run_ignores_all_dependencies(self): """ Test that run respects ignore_all_dependencies diff --git a/tests/conftest.py b/tests/conftest.py index eac88d62719..20c072bd409 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -154,6 +154,11 @@ def pytest_addoption(parser): action="store_true", help="Includes quarantined tests (marked with quarantined marker). They are skipped by default.", ) + group.addoption( + "--include-heisentests", + action="store_true", + help="Includes heisentests (marked with heisentests marker). They are skipped by default.", + ) allowed_trace_sql_columns_list = ",".join(ALLOWED_TRACE_SQL_COLUMNS) group.addoption( "--trace-sql", @@ -245,6 +250,9 @@ def pytest_configure(config): config.addinivalue_line( "markers", "quarantined: mark test that are in quarantine (i.e. flaky, need to be isolated and fixed)" ) + config.addinivalue_line( + "markers", "heisentests: mark test that should be run in isolation due to resource consumption" + ) config.addinivalue_line( "markers", "credential_file(name): mark tests that require credential file in CREDENTIALS_DIR" ) @@ -308,6 +316,13 @@ def skip_quarantined_test(item): format(item=item)) +def skip_heisen_test(item): + for _ in item.iter_markers(name="heisentests"): + pytest.skip("The test is skipped because it has heisentests marker. " + "And --include-heisentests flag is passed to pytest. {item}". + format(item=item)) + + def skip_if_integration_disabled(marker, item): integration_name = marker.args[0] environment_variable_name = "INTEGRATION_" + integration_name.upper() @@ -355,6 +370,7 @@ def pytest_runtest_setup(item): include_long_running = item.config.getoption("--include-long-running") include_quarantined = item.config.getoption("--include-quarantined") + include_heisentests = item.config.getoption("--include-heisentests") for marker in item.iter_markers(name="integration"): skip_if_integration_disabled(marker, item) @@ -373,6 +389,8 @@ def pytest_runtest_setup(item): skip_long_running_test(item) if not include_quarantined: skip_quarantined_test(item) + if not include_heisentests: + skip_heisen_test(item) skip_if_credential_file_missing(item) skip_if_airflow_2_test(item) diff --git a/tests/executors/test_dask_executor.py b/tests/executors/test_dask_executor.py index ea6e4f6f827..2f43271acb4 100644 --- a/tests/executors/test_dask_executor.py +++ b/tests/executors/test_dask_executor.py @@ -19,8 +19,6 @@ from datetime import timedelta from unittest import mock -import pytest - from airflow.jobs.backfill_job import BackfillJob from airflow.models import DagBag from airflow.utils import timezone @@ -83,7 +81,6 @@ def test_dask_executor_functions(self): executor = DaskExecutor(cluster_address=self.cluster.scheduler_address) self.assert_tasks_on_executor(executor) - @pytest.mark.quarantined def test_backfill_integration(self): """ Test that DaskExecutor can be used to backfill example dags diff --git a/tests/jobs/test_backfill_job.py b/tests/jobs/test_backfill_job.py index 7962172cd92..04abe9d7575 100644 --- a/tests/jobs/test_backfill_job.py +++ b/tests/jobs/test_backfill_job.py @@ -52,7 +52,6 @@ DEFAULT_DATE = timezone.datetime(2016, 1, 1) -@pytest.mark.quarantined class TestBackfillJob(unittest.TestCase): def _get_dummy_dag(self, dag_id, pool=Pool.DEFAULT_POOL_NAME, task_concurrency=None): diff --git a/tests/jobs/test_local_task_job.py b/tests/jobs/test_local_task_job.py index 1a8b4f9ccdb..09c18eaac30 100644 --- a/tests/jobs/test_local_task_job.py +++ b/tests/jobs/test_local_task_job.py @@ -255,7 +255,6 @@ def test_localtaskjob_double_trigger(self): session.close() - @pytest.mark.quarantined def test_localtaskjob_maintain_heart_rate(self): dagbag = DagBag( dag_folder=TEST_DAG_FOLDER, @@ -360,7 +359,6 @@ def task_function(ti): self.assertNotIn('reached_end_of_sleep', data, 'Task should not have been allowed to run to completion') - @pytest.mark.quarantined def test_mark_success_on_success_callback(self): """ Test that ensures that where a task is marked suceess in the UI diff --git a/tests/jobs/test_scheduler_job.py b/tests/jobs/test_scheduler_job.py index 2f6fa6db5c5..00325fe35ba 100644 --- a/tests/jobs/test_scheduler_job.py +++ b/tests/jobs/test_scheduler_job.py @@ -1241,7 +1241,7 @@ def test_should_mark_dummy_task_as_success(self): self.assertIsNone(duration) -@pytest.mark.quarantined +@pytest.mark.heisentests class TestDagFileProcessorQueriesCount(unittest.TestCase): """ These tests are designed to detect changes in the number of queries for different DAG files. @@ -2154,7 +2154,6 @@ def test_execute_task_instances_limit(self): self.assertEqual(State.QUEUED, ti.state) @pytest.mark.quarantined - @pytest.mark.xfail(condition=True, reason="The test is flaky with nondeterministic result") def test_change_state_for_tis_without_dagrun(self): dag1 = DAG(dag_id='test_change_state_for_tis_without_dagrun', start_date=DEFAULT_DATE) @@ -2937,7 +2936,6 @@ def do_schedule(mock_dagbag): ti.refresh_from_db() self.assertEqual(State.SUCCESS, ti.state) - @pytest.mark.quarantined def test_retry_still_in_executor(self): """ Checks if the scheduler does not put a task in limbo, when a task is retried @@ -3025,7 +3023,6 @@ def run_with_error(ti, ignore_ti_state=False): self.assertEqual(ti.state, State.SUCCESS) @pytest.mark.quarantined - @pytest.mark.xfail(condition=True, reason="This test is failing!") def test_retry_handling_job(self): """ Integration test of the scheduler not accidentally resetting diff --git a/tests/sensors/test_timeout_sensor.py b/tests/sensors/test_timeout_sensor.py index 70228ddc485..09b35b3ad22 100644 --- a/tests/sensors/test_timeout_sensor.py +++ b/tests/sensors/test_timeout_sensor.py @@ -19,8 +19,6 @@ import unittest from datetime import timedelta -import pytest - from airflow.exceptions import AirflowSensorTimeout, AirflowSkipException from airflow.models.dag import DAG from airflow.sensors.base_sensor_operator import BaseSensorOperator @@ -73,7 +71,6 @@ def setUp(self): } self.dag = DAG(TEST_DAG_ID, default_args=args) - @pytest.mark.quarantined def test_timeout(self): op = TimeoutTestSensor( task_id='test_timeout', diff --git a/tests/test_impersonation.py b/tests/test_impersonation.py index 56c5cdf43b0..30df63c06bd 100644 --- a/tests/test_impersonation.py +++ b/tests/test_impersonation.py @@ -109,7 +109,7 @@ def create_user(): ) -@pytest.mark.quarantined +@pytest.mark.heisentests class TestImpersonation(unittest.TestCase): def setUp(self): diff --git a/tests/utils/test_process_utils.py b/tests/utils/test_process_utils.py index e67afc97ffd..1620bfe747b 100644 --- a/tests/utils/test_process_utils.py +++ b/tests/utils/test_process_utils.py @@ -125,7 +125,6 @@ def my_sleep_subprocess_with_signals(): sleep(100) -@pytest.mark.quarantined class TestKillChildProcessesByPids(unittest.TestCase): def test_should_kill_process(self): before_num_process = subprocess.check_output(["ps", "-ax", "-o", "pid="]).decode().count("\n") @@ -142,6 +141,7 @@ def test_should_kill_process(self): num_process = subprocess.check_output(["ps", "-ax", "-o", "pid="]).decode().count("\n") self.assertEqual(before_num_process, num_process) + @pytest.mark.quarantined def test_should_force_kill_process(self): before_num_process = subprocess.check_output(["ps", "-ax", "-o", "pid="]).decode().count("\n") diff --git a/tests/www/test_views.py b/tests/www/test_views.py index 348d3a9accd..f276dd8738b 100644 --- a/tests/www/test_views.py +++ b/tests/www/test_views.py @@ -2411,7 +2411,6 @@ def test_user_defined_filter_and_macros_raise_error(self): ) -@pytest.mark.quarantined class TestTriggerDag(TestBase): def setUp(self):