From 25a9a0331964479d1ae1a7ba51547dc82783bf9c Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 27 Jul 2022 14:24:52 -0700 Subject: [PATCH 1/2] test(ingest): limit GMS retries in test The tests were previously hanging for a bit of time because of the retries. With these changes, running `pytest -v tests/unit/test_pipeline.py` went from ~31s to ~4s on my machine. --- metadata-ingestion/tests/conftest.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/metadata-ingestion/tests/conftest.py b/metadata-ingestion/tests/conftest.py index 800232c921c640..9e3f6c078d0367 100644 --- a/metadata-ingestion/tests/conftest.py +++ b/metadata-ingestion/tests/conftest.py @@ -1,6 +1,7 @@ import logging import os import time +import unittest.mock import pytest @@ -31,6 +32,14 @@ def fake_time(): yield +@pytest.fixture(autouse=True, scope="session") +def reduce_gms_retries(): + with unittest.mock.patch( + "datahub.emitter.rest_emitter.DataHubRestEmitter._retry_max_times", 1 + ): + yield + + def pytest_addoption(parser): parser.addoption( "--update-golden-files", From d43af5040a876f3dc5af5dec704a896528d06dd5 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 28 Jul 2022 11:18:41 -0700 Subject: [PATCH 2/2] use env var --- .../src/datahub/emitter/rest_emitter.py | 5 ++- metadata-ingestion/tests/conftest.py | 33 +++++++++---------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/metadata-ingestion/src/datahub/emitter/rest_emitter.py b/metadata-ingestion/src/datahub/emitter/rest_emitter.py index a20489163c0de3..21a6c59678e726 100644 --- a/metadata-ingestion/src/datahub/emitter/rest_emitter.py +++ b/metadata-ingestion/src/datahub/emitter/rest_emitter.py @@ -1,6 +1,7 @@ import datetime import json import logging +import os from json.decoder import JSONDecodeError from typing import Any, Dict, List, Optional, Tuple, Union @@ -34,7 +35,9 @@ class DataHubRestEmitter: 504, ] DEFAULT_RETRY_METHODS = ["HEAD", "GET", "POST", "PUT", "DELETE", "OPTIONS", "TRACE"] - DEFAULT_RETRY_MAX_TIMES = 3 + DEFAULT_RETRY_MAX_TIMES = int( + os.getenv("DATAHUB_REST_EMITTER_DEFAULT_RETRY_MAX_TIMES", "3") + ) _gms_server: str _token: Optional[str] diff --git a/metadata-ingestion/tests/conftest.py b/metadata-ingestion/tests/conftest.py index 9e3f6c078d0367..0390547800b765 100644 --- a/metadata-ingestion/tests/conftest.py +++ b/metadata-ingestion/tests/conftest.py @@ -1,12 +1,24 @@ import logging import os import time -import unittest.mock import pytest -from tests.test_helpers.docker_helpers import docker_compose_runner # noqa: F401 -from tests.test_helpers.state_helpers import mock_datahub_graph # noqa: F401 +# Enable debug logging. +logging.getLogger().setLevel(logging.DEBUG) +os.environ["DATAHUB_DEBUG"] = "1" + +# Disable telemetry +os.environ["DATAHUB_TELEMETRY_ENABLED"] = "false" + +# Reduce retries on GMS, because this causes tests to hang while sleeping +# between retries. +os.environ["DATAHUB_REST_EMITTER_DEFAULT_RETRY_MAX_TIMES"] = "1" + +# We need our imports to go below the os.environ updates, since mere act +# of importing some datahub modules will load env variables. +from tests.test_helpers.docker_helpers import docker_compose_runner # noqa: F401,E402 +from tests.test_helpers.state_helpers import mock_datahub_graph # noqa: F401,E402 try: # See https://github.com/spulec/freezegun/issues/98#issuecomment-590553475. @@ -14,13 +26,6 @@ except ImportError: pass -# Enable debug logging. -logging.getLogger().setLevel(logging.DEBUG) -os.putenv("DATAHUB_DEBUG", "1") - -# Disable telemetry -os.putenv("DATAHUB_TELEMETRY_ENABLED", "false") - @pytest.fixture def mock_time(monkeypatch): @@ -32,14 +37,6 @@ def fake_time(): yield -@pytest.fixture(autouse=True, scope="session") -def reduce_gms_retries(): - with unittest.mock.patch( - "datahub.emitter.rest_emitter.DataHubRestEmitter._retry_max_times", 1 - ): - yield - - def pytest_addoption(parser): parser.addoption( "--update-golden-files",