Skip to content

Commit

Permalink
Use referencing library (#241)
Browse files Browse the repository at this point in the history
* use referencing

* update tests

* types

* mypy

* mypy

* mypy

* pre-commit

* add mypy,ini

* revert

* update workflow

* update changelog
  • Loading branch information
jonhealy1 authored Jan 10, 2025
1 parent 7983a81 commit a656fa3
Show file tree
Hide file tree
Showing 11 changed files with 291 additions and 209 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test-runner.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
with:
python-version: ${{ matrix.python-version }}

- name: Run unit tests
- name: Run mypy
run: |
pip install .
pip install -r requirements-dev.txt
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ repos:
rev: 24.1.1
hooks:
- id: black
language_version: python3.10
# language_version: python3.11
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.8.0
hooks:
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ The format is (loosely) based on [Keep a Changelog](http://keepachangelog.com/)
- Configure whether to open URLs when validating assets [#238](https://github.com/stac-utils/stac-validator/pull/238)
- Allow to provide HTTP headers [#239](https://github.com/stac-utils/stac-validator/pull/239)

### Changed

- Switched to the referencing library for dynamic JSON schema validation and reference resolution [#241](https://github.com/stac-utils/stac-validator/pull/241)

## [v3.4.0] - 2024-10-08

### Added
Expand Down
7 changes: 4 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,10 @@
long_description_content_type="text/markdown",
url="https://github.com/stac-utils/stac-validator",
install_requires=[
"requests>=2.19.1",
"jsonschema>=3.2.0",
"click>=8.0.0",
"requests>=2.32.3",
"jsonschema>=4.23.0",
"click>=8.1.8",
"referencing>=0.35.1",
],
extras_require={
"dev": [
Expand Down
97 changes: 95 additions & 2 deletions stac_validator/utilities.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
import functools
import json
import ssl
from typing import Dict
from typing import Dict, Optional
from urllib.parse import urlparse
from urllib.request import Request, urlopen

import jsonschema
import requests # type: ignore
from jsonschema import Draft202012Validator
from referencing import Registry, Resource
from referencing.jsonschema import DRAFT202012
from referencing.retrieval import to_cached_resource
from referencing.typing import URI

NEW_VERSIONS = [
"1.0.0-beta.2",
Expand Down Expand Up @@ -77,7 +83,7 @@ def get_stac_type(stac_content: Dict) -> str:
return str(e)


def fetch_and_parse_file(input_path: str, headers: Dict = {}) -> Dict:
def fetch_and_parse_file(input_path: str, headers: Optional[Dict] = None) -> Dict:
"""Fetches and parses a JSON file from a URL or local file.
Given a URL or local file path to a JSON file, this function fetches the file,
Expand Down Expand Up @@ -184,3 +190,90 @@ def link_request(
else:
initial_message["request_invalid"].append(link["href"])
initial_message["format_invalid"].append(link["href"])


def fetch_remote_schema(uri: str) -> dict:
"""
Fetch a remote schema from a URI.
Args:
uri (str): The URI of the schema to fetch.
Returns:
dict: The fetched schema content as a dictionary.
Raises:
requests.RequestException: If the request to fetch the schema fails.
"""
response = requests.get(uri)
response.raise_for_status()
return response.json()


@to_cached_resource() # type: ignore
def cached_retrieve(uri: URI) -> str:
"""
Retrieve and cache a remote schema.
Args:
uri (str): The URI of the schema.
Returns:
str: The raw JSON string of the schema.
Raises:
requests.RequestException: If the request to fetch the schema fails.
Exception: For any other unexpected errors.
"""
try:
response = requests.get(uri, timeout=10) # Set a timeout for robustness
response.raise_for_status() # Raise an error for HTTP response codes >= 400
return response.text
except requests.exceptions.RequestException as e:
raise requests.RequestException(
f"Failed to fetch schema from {uri}: {str(e)}"
) from e
except Exception as e:
raise Exception(
f"Unexpected error while retrieving schema from {uri}: {str(e)}"
) from e


def validate_with_ref_resolver(schema_path: str, content: dict) -> None:
"""
Validate a JSON document against a JSON Schema with dynamic reference resolution.
Args:
schema_path (str): Path or URI of the JSON Schema.
content (dict): JSON content to validate.
Raises:
jsonschema.exceptions.ValidationError: If validation fails.
requests.RequestException: If fetching a remote schema fails.
FileNotFoundError: If a local schema file is not found.
Exception: If any other error occurs during validation.
"""
# Load the schema
if schema_path.startswith("http"):
schema = fetch_remote_schema(schema_path)
else:
try:
with open(schema_path, "r") as f:
schema = json.load(f)
except FileNotFoundError as e:
raise FileNotFoundError(f"Schema file not found: {schema_path}") from e

# Set up the resource and registry for schema resolution
resource: Resource = Resource(contents=schema, specification=DRAFT202012) # type: ignore
registry: Registry = Registry(retrieve=cached_retrieve).with_resource( # type: ignore
uri=schema_path, resource=resource
) # type: ignore

# Validate the content against the schema
try:
validator = Draft202012Validator(schema, registry=registry)
validator.validate(content)
except jsonschema.exceptions.ValidationError as e:
raise jsonschema.exceptions.ValidationError(f"{e.message}") from e
except Exception as e:
raise Exception(f"Unexpected error during validation: {str(e)}") from e
Loading

0 comments on commit a656fa3

Please sign in to comment.