From 55415fed0e72bd70173595d5420414db7467e43f Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Mon, 30 Dec 2024 17:32:18 -0500 Subject: [PATCH] Add --sdmx-fetch-data; use in GHA workflow --- .github/workflows/pytest.yaml | 1 + .pre-commit-config.yaml | 1 + pyproject.toml | 1 + sdmx/testing/__init__.py | 54 +++++++++++++++++++++++++++++++++-- 4 files changed, 54 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index ca8d2a158..9a9ea28cf 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -51,6 +51,7 @@ jobs: uv run --no-sync \ pytest \ -ra --color=yes --verbose \ + --sdmx-fetch-data \ --cov-report=xml \ --numprocesses auto shell: bash diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 162344ef8..4a6335937 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,6 +4,7 @@ repos: hooks: - id: mypy additional_dependencies: + - GitPython - lxml-stubs - pandas-stubs - pytest diff --git a/pyproject.toml b/pyproject.toml index 048d81bc0..6a7bd87c6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ dependencies = [ cache = ["requests-cache"] docs = ["furo", "IPython", "sphinx >= 8"] tests = [ + "GitPython", "Jinja2", "pytest >= 5", "pytest-cov", diff --git a/sdmx/testing/__init__.py b/sdmx/testing/__init__.py index 2ac594d85..b866ee18f 100644 --- a/sdmx/testing/__init__.py +++ b/sdmx/testing/__init__.py @@ -7,6 +7,7 @@ import numpy as np import pandas as pd +import platformdirs import pytest import responses @@ -17,6 +18,9 @@ log = logging.getLogger(__name__) +DATA_DEFAULT_DIR = platformdirs.user_cache_path("sdmx").joinpath("test-data") +DATA_REMOTE_URL = "git@github.com:khaeru/sdmx-test-data.git" +# DATA_REMOTE_URL = "https://github.com/khaeru/sdmx-test-data.git" # Expected to_pandas() results for data files; see expected_data() # - Keys are the file name (above) with '.' -> '-': 'foo.xml' -> 'foo-xml' @@ -49,12 +53,48 @@ def assert_pd_equal(left, right, **kwargs): method(left, right, **kwargs) +def fetch_data() -> Path: + """Fetch test data from GitHub.""" + import git + + # Initialize a git Repo object + repo = git.Repo.init(DATA_DEFAULT_DIR) + + # Ensure there is a remote for the origin + try: + origin = repo.remote("origin") + except ValueError: + origin = repo.create_remote("origin", DATA_REMOTE_URL) + else: + origin.set_url(DATA_REMOTE_URL) + + log.info(f"Fetch test data from {origin} → {repo.working_dir}") + + # Fetch only 1 commit from the remote + origin.fetch("refs/heads/main", depth=1) + b = origin.refs["main"] + + # Check out the branch + try: + head = repo.heads["main"] + except IndexError: + head = repo.create_head("main", b) + head.set_tracking_branch(b).checkout() + + return Path(repo.working_dir) + + def pytest_addoption(parser): - """Add the ``--sdmx-test-data`` command-line option to pytest.""" + """Add pytest command-line options.""" + parser.addoption( + "--sdmx-fetch-data", + action="store_true", + help="fetch test specimens from GitHub", + ) parser.addoption( "--sdmx-test-data", # Use the environment variable value by default - default=os.environ.get("SDMX_TEST_DATA", None), + default=os.environ.get("SDMX_TEST_DATA", DATA_DEFAULT_DIR), help="path to SDMX test specimens", ) @@ -70,8 +110,16 @@ def pytest_configure(config): config._sdmx_reporter = ServiceReporter(config) config.pluginmanager.register(config._sdmx_reporter) + # Optionally clone the test data + if config.option.sdmx_fetch_data and not config.option.sdmx_test_data: + config.option.sdmx_test_data = fetch_data() + # Check the value can be converted to a path, and exists - message = "Give --sdmx-test-data=… or set the SDMX_TEST_DATA environment variable" + message = ( + "Unable to locate test specimens. Give --sdmx-fetch-data, or use " + "--sdmx-test-data=… or the SDMX_TEST_DATA environment variable to indicate an " + "existing directory" + ) try: sdmx_test_data = Path(config.option.sdmx_test_data) except TypeError: # pragma: no cover