Skip to content

Commit

Permalink
Add --sdmx-fetch-data; use in GHA workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
khaeru committed Dec 31, 2024
1 parent eb1d805 commit d13b719
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 3 deletions.
1 change: 1 addition & 0 deletions .github/workflows/pytest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ jobs:
uv run --no-sync \
pytest \
-ra --color=yes --verbose \
--sdmx-fetch-data \
--cov-report=xml \
--numprocesses auto
shell: bash
Expand Down
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ repos:
hooks:
- id: mypy
additional_dependencies:
- GitPython
- lxml-stubs
- pandas-stubs
- pytest
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ dependencies = [
cache = ["requests-cache"]
docs = ["furo", "IPython", "sphinx >= 8"]
tests = [
"GitPython",
"Jinja2",
"pytest >= 5",
"pytest-cov",
Expand Down
70 changes: 67 additions & 3 deletions sdmx/testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
from collections import ChainMap
from contextlib import contextmanager
from pathlib import Path, PurePosixPath
from time import sleep
from typing import Union

import numpy as np
import pandas as pd
import platformdirs
import pytest
import responses

Expand All @@ -17,6 +19,9 @@

log = logging.getLogger(__name__)

DATA_DEFAULT_DIR = platformdirs.user_cache_path("sdmx").joinpath("test-data")
# DATA_REMOTE_URL = "git@github.com:khaeru/sdmx-test-data.git"
DATA_REMOTE_URL = "https://github.com/khaeru/sdmx-test-data.git"

# Expected to_pandas() results for data files; see expected_data()
# - Keys are the file name (above) with '.' -> '-': 'foo.xml' -> 'foo-xml'
Expand Down Expand Up @@ -49,12 +54,63 @@ def assert_pd_equal(left, right, **kwargs):
method(left, right, **kwargs)


def fetch_data() -> Path:
"""Fetch test data from GitHub."""
import git

# Initialize a git Repo object
repo = git.Repo.init(DATA_DEFAULT_DIR)

try:
# Reference to existing 'origin' remote
origin = repo.remotes["origin"]
# Ensure the DATA_REMOTE_URL is among the URLs for this remote
if DATA_REMOTE_URL not in origin.urls:
origin.set_url(DATA_REMOTE_URL)
except IndexError:
# Create a new remote
origin = repo.create_remote("origin", DATA_REMOTE_URL)

log.info(f"Fetch test data from {origin}{repo.working_dir}")

while True: # Retry to avoid concurrency issues when running with pytest-xdist
try:
# Fetch only 1 commit from the remote
origin.fetch("refs/heads/main", depth=1)
break
except git.GitCommandError as e:
if e.args[1] == 128 and b"shallow" in e.args[2]:
sleep(0.1) # Wait for a lockfile to clear, try again
else:
raise

origin_main = origin.refs["main"] # Reference to 'origin/main'
try:
head = repo.heads["main"] # Reference to existing local 'main'
except IndexError:
head = repo.create_head("main", origin_main) # Create a local 'main'

if (
head.commit != origin_main.commit # Commit differs
or (repo.is_dirty() or len(repo.index.diff("HEAD"))) # Working dir is not clean
):
# Check out files into the working directory
head.set_tracking_branch(origin_main).checkout()

return Path(repo.working_dir)


def pytest_addoption(parser):
"""Add the ``--sdmx-test-data`` command-line option to pytest."""
"""Add pytest command-line options."""
parser.addoption(
"--sdmx-fetch-data",
action="store_true",
help="fetch test specimens from GitHub",
)
parser.addoption(
"--sdmx-test-data",
# Use the environment variable value by default
default=os.environ.get("SDMX_TEST_DATA", None),
default=os.environ.get("SDMX_TEST_DATA", DATA_DEFAULT_DIR),
help="path to SDMX test specimens",
)

Expand All @@ -70,8 +126,16 @@ def pytest_configure(config):
config._sdmx_reporter = ServiceReporter(config)
config.pluginmanager.register(config._sdmx_reporter)

# Optionally clone the test data
if config.option.sdmx_fetch_data:
config.option.sdmx_test_data = fetch_data()

# Check the value can be converted to a path, and exists
message = "Give --sdmx-test-data=… or set the SDMX_TEST_DATA environment variable"
message = (
"Unable to locate test specimens. Give --sdmx-fetch-data, or use "
"--sdmx-test-data=… or the SDMX_TEST_DATA environment variable to indicate an "
"existing directory"
)
try:
sdmx_test_data = Path(config.option.sdmx_test_data)
except TypeError: # pragma: no cover
Expand Down

0 comments on commit d13b719

Please sign in to comment.