Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Populate the schema cache with local defs.json again #1917

Merged
merged 19 commits into from
Aug 26, 2022
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
'scrapbook~=0.5.0',
'jupyter',
'graphviz',
'pytest-socket>=0.2',
lhenkelm marked this conversation as resolved.
Show resolved Hide resolved
]
)
)
Expand Down
3 changes: 3 additions & 0 deletions src/pyhf/schema/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ def __call__(self, new_path: pathlib.Path):
self (pyhf.schema.Schema): Returns itself (for contextlib management)
"""
self.orig_path, variables.schemas = variables.schemas, new_path
self.orig_cache = dict(variables.SCHEMA_CACHE)
variables.SCHEMA_CACHE.clear()
return self

def __enter__(self):
Expand All @@ -85,6 +87,7 @@ def __exit__(self, *args, **kwargs):
None
"""
variables.schemas = self.orig_path
variables.SCHEMA_CACHE = self.orig_cache

@property
def path(self):
Expand Down
6 changes: 6 additions & 0 deletions src/pyhf/schema/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,9 @@ def load_schema(schema_id: str):
schema = json.load(json_schema)
variables.SCHEMA_CACHE[schema['$id']] = schema
return variables.SCHEMA_CACHE[schema['$id']]


# pre-populate the cache to avoid network access
# on first validation in standard usage
# (not in pyhf.schema.variables to avoid circular imports)
load_schema(f'{variables.SCHEMA_VERSION}/defs.json')
88 changes: 81 additions & 7 deletions tests/test_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import pyhf
import pytest
import json
import importlib
import sys
from pytest_socket import socket_disabled # noqa: F401


@pytest.mark.parametrize('version', ['1.0.0'])
Expand All @@ -27,11 +30,20 @@ def test_schema_callable():
assert callable(pyhf.schema)


def test_schema_changeable(datadir, monkeypatch):
@pytest.fixture
def self_restoring_schema_globals():
old_path = pyhf.schema.path
old_cache = dict(pyhf.schema.variables.SCHEMA_CACHE)
yield old_path, old_cache
pyhf.schema(old_path)
pyhf.schema.variables.SCHEMA_CACHE = old_cache


def test_schema_changeable(datadir, monkeypatch, self_restoring_schema_globals):
monkeypatch.setattr(
pyhf.schema.variables, 'schemas', pyhf.schema.variables.schemas, raising=True
)
old_path = pyhf.schema.path
old_path, old_cache = self_restoring_schema_globals
new_path = datadir / 'customschema'

with pytest.raises(pyhf.exceptions.SchemaNotFound):
Expand All @@ -40,36 +52,47 @@ def test_schema_changeable(datadir, monkeypatch):
pyhf.schema(new_path)
assert old_path != pyhf.schema.path
assert new_path == pyhf.schema.path
assert pyhf.schema.variables.SCHEMA_CACHE is not old_cache
assert len(pyhf.schema.variables.SCHEMA_CACHE) == 0
assert pyhf.Workspace(json.load(open(new_path / 'custom.json')))
pyhf.schema(old_path)
assert len(pyhf.schema.variables.SCHEMA_CACHE) == 1


def test_schema_changeable_context(datadir, monkeypatch):
def test_schema_changeable_context(datadir, monkeypatch, self_restoring_schema_globals):
monkeypatch.setattr(
pyhf.schema.variables, 'schemas', pyhf.schema.variables.schemas, raising=True
)
old_path = pyhf.schema.path
old_path, old_cache = self_restoring_schema_globals
new_path = datadir / 'customschema'

assert old_path == pyhf.schema.path
with pyhf.schema(new_path):
assert old_path != pyhf.schema.path
assert new_path == pyhf.schema.path
assert pyhf.schema.variables.SCHEMA_CACHE is not old_cache
assert len(pyhf.schema.variables.SCHEMA_CACHE) == 0
assert pyhf.Workspace(json.load(open(new_path / 'custom.json')))
assert len(pyhf.schema.variables.SCHEMA_CACHE) == 1
assert old_path == pyhf.schema.path
assert old_cache == pyhf.schema.variables.SCHEMA_CACHE


def test_schema_changeable_context_error(datadir, monkeypatch):
def test_schema_changeable_context_error(
datadir, monkeypatch, self_restoring_schema_globals
):
monkeypatch.setattr(
pyhf.schema.variables, 'schemas', pyhf.schema.variables.schemas, raising=True
)
old_path = pyhf.schema.path
old_path, old_cache = self_restoring_schema_globals
new_path = datadir / 'customschema'

with pytest.raises(ZeroDivisionError):
with pyhf.schema(new_path):
# this populates the current cache
pyhf.Workspace(json.load(open(new_path / 'custom.json')))
raise ZeroDivisionError()
assert old_path == pyhf.schema.path
assert old_cache == pyhf.schema.variables.SCHEMA_CACHE


def test_no_channels():
Expand Down Expand Up @@ -567,3 +590,54 @@ def test_patchset_fail(datadir, patchset_file):
patchset = json.load(open(datadir.joinpath(patchset_file)))
with pytest.raises(pyhf.exceptions.InvalidSpecification):
pyhf.schema.validate(patchset, 'patchset.json')


@pytest.fixture
def refresh_pyhf(monkeypatch):
global pyhf
modules_to_clear = [name for name in sys.modules if name.split('.')[0] == 'pyhf']
for module_name in modules_to_clear:
monkeypatch.delitem(sys.modules, module_name)
pyhf = importlib.import_module(pyhf.__name__)
return pyhf
lhenkelm marked this conversation as resolved.
Show resolved Hide resolved


def test_defs_always_cached(
socket_disabled, # noqa: F811
refresh_pyhf, # ensure there is not a pre-existing cache hiding the issue
lhenkelm marked this conversation as resolved.
Show resolved Hide resolved
):
"""
Schema definitions should always be loaded from the local files and cached at first import.

Otherwise using pyhf in contexts where the jsonschema.RefResolver cannot lookup the definition by the schema-id,
it will crash (e.g. a cluster node without network access).
"""
pyhf = refresh_pyhf
spec = {
'channels': [
{
'name': 'singlechannel',
'samples': [
{
'name': 'signal',
'data': [10],
'modifiers': [
{'name': 'mu', 'type': 'normfactor', 'data': None}
],
},
{
'name': 'background',
'data': [20],
'modifiers': [
{
'name': 'uncorr_bkguncrt',
'type': 'shapesys',
'data': [30],
}
],
},
],
}
]
}
pyhf.schema.validate(spec, 'model.json') # may try to access network and fail