diff --git a/doc/sources.rst b/doc/sources.rst index 91eb6a81..f07d6316 100644 --- a/doc/sources.rst +++ b/doc/sources.rst @@ -115,6 +115,36 @@ SDMX-JSON — .. autoclass:: sdmx.source.abs_json.Source() :members: +.. _AR1: + +``AR1``: National Institute of Statistics and Censuses (Argentina) +------------------------------------------------------------------ + +SDMX-ML — `Website `__ + +- Spanish name: Instituto Nacional de Estadística y Censos + +This source does not provide an actual SDMX-REST web service. +Instead, a set of SDMX-ML 2.1 files with data messages only (no structure or metadata) are available at URLs with the form: ``https://sdds.indec.gob.ar/files/data/IND.XML``. +These can be used with :class:`Client` by: + +- Using ``https://sdds.indec.gob.ar/files/`` as the base URL. +- Accessing only the :attr:`.Resource.data` endpoint, which gives the ``…/data/…`` URL component. +- Treating ``IND.XML`` (in reality, a file name with suffix) as the resource ID. +- Using no query key or parameters. + +.. code-block:: python + + c = sdmx.Client("AR1") + # The URL https://sdds.indec.gob.ar/files/data/IND.XML + dm = c.data("IND.XML") + +This is the same as using a non-source-specific Client to query the URL directly: + +.. code-block:: python + + c = sdmx.Client() + dm = c.get(url="https://sdds.indec.gob.ar/files/data/IND.XML") .. _BBK: @@ -240,15 +270,93 @@ SDMX-ML — .. _IMF: -``IMF``: International Monetary Fund's “SDMX Central” source ------------------------------------------------------------- +International Monetary Fund +--------------------------- + +As of 2025-01-10, there appear to be at least *three* systems operated by the IMF from which SDMX responses are available. +Theses are listed here from oldest to newest, and identified by the domain used in the base URL for requests. + +(no ID): dataservices.smdx.org +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +SDMX-ML and SDMX-JSON — +API documentation `1 `__, +`2 `__ + +- This appears to be an SDMX 2.0 REST web service, that can be induced to return SDMX-ML 2.1 or SDMX-JSON 1.0.0 messages through a ``?format=sdmx-2.1`` query parameter. +- :mod:`sdmx` does not provide a :file:`sources.json` entry/ID or tests for this service. +- However, the package code can still be used to access the responses. + For example: + +.. code-block:: python + + import sdmx + + client = sdmx.Client() + url = ( + # Base URL + "http://dataservices.imf.org/REST/SDMX_XML.svc/CompactData/" + # Data flow ID and key + "PCPS/M.W00.PZINC." + # Query parameters, including format + "?startPeriod=2021&endPeriod=2022&format=sdmx-2.1" + ) + + # Retrieve an SDMX-ML 2.1 data message + message = client.get(url=url) + + # Convert the single data set to pandas.Series with multi-index + df = sdmx.to_pandas(message.data[0]) + +``IMF``: sdmxcentral.imf.org +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SDMX-ML — `Website `__ -- Subset of the data available on http://data.imf.org. +- This appears to be an instance of the “Fusion Metadata Registry” software. + Such instances also expose SDMX 2.1 and 3.0 APIs. +- No API documentation appears to be available. +- The :mod:`sdmx` source with ID ``IMF`` corresponds to the SDMX 2.1 (SDMX-REST 1.x) API with base URL https://sdmxcentral.imf.org/ws/public/sdmxapi/rest. + The web interface suggests URLs for the SDMX 3.0.0 (SDMX-REST 2.x) API with base URL https://sdmxcentral.imf.org/sdmx/v2. + This API can be accessed by modifying the :attr:`.Source.url` and :attr:`~.Source.versions` attributes, or by constructing a new Source. + For example: + + .. code-block:: python + + import sdmx + from sdmx.format import Version + + client = sdmx.Client("IMF") + client.source.url = "https://sdmxcentral.imf.org/sdmx/v2" + client.source.versions = {Version["3.0.0"]} + + # Retrieve an SDMX-ML 3.0.0 structure message + message = client.dataflow("01R") + +- The source appears to provide a subset of the data available on https://data.imf.org. - Supports series-key-only and hence dataset-based key validation and construction. +``IMF_beta``, ``IMF_beta3``: api.imf.org +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +SDMX-ML — +`Website `__ — +`API documentation `__ + +.. warning:: As of 2025-01-10, this source carries a banner: + + We're in Beta! + Help us improve by `testing `__ and sharing `feedback `__. + This is a beta version; the data is not final and should not be used for actual work. + + Users should heed this message. + The source IDs used in :mod:`sdmx` may change if and when this source exits beta and enters production, or is designated as the recommended, primary, or sole IMF source. + +- The API documentation indicates "Our data are available through SDMX 2.1 and SDMX 3.0 APIs," but the documentation pages mention only the SDMX 2.1 (SDMX-REST 1.x) base URL, https://api.imf.org/external/sdmx/2.1. + The base URL used by :mod:`sdmx` for the SDMX 3.0 (SDMX-REST 2.x) API is inferred. +- :mod:`sdmx` provides access to both versions of the API with IDs ``IMF_beta`` and ``IMF_beta3``. + As of 2025-01-10, both return HTTP **403 Forbidden** to every request except the SDMX 2.1 data query illustrated in the API documentation. .. _INEGI: @@ -425,6 +533,26 @@ API documentation `(en) `__, +`(fr) `__. + +- The source only provides a SDMX-REST API for the ``/data/`` endpoint. +- Some structural artefacts are available, but not through an SDMX-REST API. + Instead, a set of SDMX-ML 2.1 files with structure messages are available at URLs with the form: ``https://www150.statcan.gc.ca/t1/wds/sdmx/statcan/rest/structure/Data_Structure_17100005``. + (Note that this lacks the URL path components for the agency ID and version, which would resemble ``…/structure/StatCan/Data_Structure_17100005/latest``.) + + These can be queried directly using any Client: + + .. code-block:: python + + c = sdmx.Client("StatCan") # or sdmx.Client() + dm = c.get(url="https://www150.statcan.gc.ca/t1/wds/sdmx/statcan/rest/structure/Data_Structure_17100005") .. _UNESCO: @@ -503,6 +631,17 @@ SDMX-ML — - Supports preview_data and series-key based key validation. +.. _UY100: + +``UY100``: Labour Market Information System (Uruguay) +----------------------------------------------------- + +SDMX-ML — +Website `(en) `__, +`(es) `__. + +- Spanish name: Sistema de Información de Mercado Laboral +- Operated by the Ministry of Labour and Social Security of (Ministerio de Trabajo y Seguridad Social, MTSS), the National Institute of Statistics (Instituto Nacional de Estadística, INE) and the Social Security Bank (Banco de Previsión Social, BPS) of Uruguay. .. _WB: diff --git a/doc/whatsnew.rst b/doc/whatsnew.rst index 2b2d49a5..ef0e7c6c 100644 --- a/doc/whatsnew.rst +++ b/doc/whatsnew.rst @@ -3,11 +3,14 @@ What's new? *********** -.. _2.20.1: +.. _2.21.0: Next release ============ +- Add :ref:`AR1 `, :ref:`StatCan `, and :ref:`UY110 ` data sources (:pull:`218`, :issue:`186`, :issue:`187`, :issue:`188`). +- New function :func:`.get_source` for case-insensitive lookup of sources (:pull:`218`). + :class:`.Client` will handle, for instance, :py:`Client("wb")` the same as :py:`Client("WB")` and log a message about the difference. - Simplify :class:`.Session` via direct inheritance from :class:`.requests_cache.session.CacheMixin`, where installed (:pull:`217`). - Add an optional :py:`session=...` keyword argument to :class:`.Client` (:pull:`217`). - Improve :ref:`network and offline tests ` via new and improved test utilities (:pull:`217`). diff --git a/sdmx/client.py b/sdmx/client.py index 00cf2f5d..8426cd20 100644 --- a/sdmx/client.py +++ b/sdmx/client.py @@ -10,7 +10,7 @@ from sdmx.reader import get_reader from sdmx.rest import Resource from sdmx.session import ResponseIO, Session -from sdmx.source import NoSource, list_sources, sources +from sdmx.source import NoSource, get_source, list_sources if TYPE_CHECKING: import io @@ -79,7 +79,7 @@ def __init__( **session_opts, ): try: - self.source = sources[source.upper()] if source else NoSource + self.source = get_source(source) if source else NoSource except KeyError: raise ValueError( f"source must be None or one of: {' '.join(list_sources())}" diff --git a/sdmx/format/xml/common.py b/sdmx/format/xml/common.py index 0a6a4115..9eafef4a 100644 --- a/sdmx/format/xml/common.py +++ b/sdmx/format/xml/common.py @@ -6,7 +6,7 @@ from operator import itemgetter from pathlib import Path from shutil import copytree -from typing import IO, Iterable, Mapping, Optional, Union +from typing import IO, Iterable, Mapping, Optional, Union, cast from lxml import etree from lxml.etree import QName @@ -99,61 +99,75 @@ def validate_xml( msg: Union[Path, IO], schema_dir: Optional[Path] = None, version: Union[str, Version] = Version["2.1"], + max_errors: int = -1, ) -> bool: - """Validate and SDMX message against the XML Schema (XSD) documents. + """Validate SDMX-ML in `msg` against the XML Schema (XSD) documents. - The XML Schemas must first be installed or validation will fail. See - :func:`sdmx.install_schemas` to download the schema files. + A log message with level :data:`logging.ERROR` is emitted if validation fails. This + indicates the first (possibly not only) element in `msg` that is not valid per the + schemas. Parameters ---------- msg - A SDMX-ML Message formatted XML file. + Path or io-like containing an SDMX-ML message. schema_dir - The directory to XSD schemas used to validate the message. + Directory with SDMX-ML XSD schemas used to validate the message. version The SDMX-ML schema version to validate against. One of ``2.1`` or ``3.0``. + max_errors + Maximum number of messages to log on validation failure. Returns ------- bool - True if validation passed. False otherwise. + :any:`True` if validation passed, otherwise :any:`False`. + + Raises + ------ + FileNotFoundError + if `schema_dir` (or a subdirectory) does not contain :file:`SDMXMessage.xsd`. + Use :func:`sdmx.install_schemas` to download the schema files. + NotImplementedError + if `msg` contains valid XML, but with a root element that is not part of the + SDMX-ML standard. """ schema_dir, version = _handle_validate_args(schema_dir, version) - msg_doc = etree.parse(msg) + # Find SDMXMessage.xsd in `schema_dir` or a subdirectory + for candidate in schema_dir, schema_dir.joinpath(version.name): + try: + # Turn the XSD into a schema object + xml_schema = etree.XMLSchema(file=candidate.joinpath("SDMXMessage.xsd")) + break + except Exception: + xml_schema = None - # Make sure the message is a supported type - supported_elements = [ - "CodelistQuery", - "DataStructureQuery", - "GenericData", - "GenericMetadata", - "GenericTimeSeriesData", - "MetadataStructureQuery", - "Structure", - "StructureSpecificData", - "StructureSpecificMetadata", - "StructureSpecificTimeSeriesData", - ] - root_elem_name = msg_doc.docinfo.root_name - if root_elem_name not in supported_elements: - raise NotImplementedError - - message_xsd = schema_dir.joinpath("SDMXMessage.xsd") - if not message_xsd.exists(): - raise ValueError(f"Could not find XSD files in {schema_dir}") - - # Turn the XSD into a schema object - xml_schema_doc = etree.parse(message_xsd) - xml_schema = etree.XMLSchema(xml_schema_doc) + if xml_schema is None: + raise FileNotFoundError(f"Could not find XSD files in {schema_dir}") - try: - xml_schema.assertValid(msg_doc) - except etree.DocumentInvalid as err: - log.error(err) - finally: - return xml_schema.validate(msg_doc) + # Parse the given document + msg_doc = etree.parse(msg) + + if not xml_schema.validate(msg_doc): + for i, entry in enumerate( + cast(Iterable["etree._LogEntry"], xml_schema.error_log) + ): + if ( + i == 0 + and "No matching global declaration available for the validation root" + in entry.message + ): + raise NotImplementedError( + f"Validate non-SDMX root element <{msg_doc.getroot().tag}>" + ) from None + elif i == max_errors: + break + log.log(getattr(logging, entry.level_name), entry.message) + + return False + else: + return True def _extracted_zipball(version: Version) -> Path: diff --git a/sdmx/reader/xml/common.py b/sdmx/reader/xml/common.py index 8fa15a43..20e43422 100644 --- a/sdmx/reader/xml/common.py +++ b/sdmx/reader/xml/common.py @@ -77,9 +77,8 @@ def __init__(self, reader, elem, cls_hint=None): # class above target_cls = cls_hint - # DEBUG - # if target_cls is None: - # print(f"{info = }") + if target_cls is None: + raise ValueError(f"Unable to determine target class for {info}", info) self.maintainable = issubclass(target_cls, common.MaintainableArtefact) diff --git a/sdmx/reader/xml/v21.py b/sdmx/reader/xml/v21.py index 86e9f685..c4280d13 100644 --- a/sdmx/reader/xml/v21.py +++ b/sdmx/reader/xml/v21.py @@ -331,10 +331,11 @@ def _structures(reader, elem): @end( """ com:AnnotationTitle com:AnnotationType com:AnnotationURL com:None com:URN com:Value - mes:DataSetAction :ReportPeriod md:ReportPeriod mes:DataSetID mes:Email mes:ID - mes:Test mes:Timezone str:CodelistAliasRef str:DataType str:Email str:Expression - str:NullValue str:OperatorDefinition str:PersonalisedName str:Result - str:RulesetDefinition str:Telephone str:URI str:VtlDefaultName str:VtlScalarType + mes:DataSetAction :ReportPeriod md:ReportPeriod mes:DataSetID mes:Email mes:Fax + mes:ID mes:Telephone mes:Test mes:Timezone mes:URI mes:X400 str:CodelistAliasRef + str:DataType str:Email str:Expression str:NullValue str:OperatorDefinition + str:PersonalisedName str:Result str:RulesetDefinition str:Telephone str:URI + str:VtlDefaultName str:VtlScalarType """ ) def _text(reader, elem): @@ -403,7 +404,23 @@ def _ref(reader: Reader, elem): # In a StructureMessage cls_hint = reader.model.DataStructureDefinition - reader.push(QName(elem).localname, reader.reference(elem, cls_hint)) + try: + ref = reader.reference(elem, cls_hint) + except ValueError as e: + # Handle references to known non-standard classes; see + # https://github.com/khaeru/sdmx/issues/180 + info = e.args[1] + if info["package"] == "publicationtable": + log.warning( + "Cannot resolve reference to non-SDMX class " + f"'{info['package']}.{info['class']}'" + ) + # Push the dict of reference info, in case the user wants to make use of it + ref = info + else: + raise + + reader.push(QName(elem).localname, ref) @end("com:Annotation") @@ -745,10 +762,12 @@ def _contact_start(reader, elem): @end("mes:Contact str:Contact", only=False) def _contact_end(reader, elem): - contact = model.Contact( + contact = common.Contact( + email=reader.pop_all("Email"), + fax=reader.pop_all("Fax"), telephone=reader.pop_single("Telephone"), uri=reader.pop_all("URI"), - email=reader.pop_all("Email"), + x400=reader.pop_all("X400"), ) add_localizations(contact.name, reader.pop_all("Name")) diff --git a/sdmx/reader/xml/v30.py b/sdmx/reader/xml/v30.py index b7fb69d0..63633073 100644 --- a/sdmx/reader/xml/v30.py +++ b/sdmx/reader/xml/v30.py @@ -8,7 +8,7 @@ from sdmx.model import v30 as model from . import v21 -from .common import BaseReference, XMLEventReader +from .common import BaseReference, NotReference, XMLEventReader class Reference(BaseReference): @@ -21,12 +21,12 @@ def info_from_element(cls, elem): # If the URN doesn't specify an item ID, it is probably a reference to a # MaintainableArtefact, so target_id and id are the same result.update(target_id=result["item_id"] or result["id"]) - except ValueError: + except (KeyError, ValueError): # Bare string that is the ID of e.g. a component (dimension) if id := (elem.text or "").strip(): result = {"id": id, "target_id": id, "class": None, "package": None} else: - raise v21.NotReference + raise NotReference() return result diff --git a/sdmx/source/__init__.py b/sdmx/source/__init__.py index 6cbbfa9f..403c910a 100644 --- a/sdmx/source/__init__.py +++ b/sdmx/source/__init__.py @@ -1,5 +1,7 @@ import importlib.resources import json +import logging +import re from dataclasses import dataclass, field from enum import Enum from importlib import import_module @@ -15,6 +17,8 @@ if TYPE_CHECKING: import sdmx.rest.common +log = logging.getLogger(__name__) + #: Data sources registered with :mod:`sdmx`. sources: dict[str, "Source"] = {} @@ -250,6 +254,22 @@ def add_source( sources[id] = SourceClass(**_info) +def get_source(id: str) -> Source: + """Return the Source with the given `id`. + + `id` is matched case-insensitively. + """ + try: + return sources[id] + except KeyError: + # Try to find a case-insensitive match + for k, v in sources.items(): + if re.match(k, id, flags=re.IGNORECASE): + log.info(f"Return {v} as a case-insensitive match for source id={id!r}") + return v + raise + + def list_sources(): """Return a sorted list of valid source IDs. @@ -260,15 +280,7 @@ def list_sources(): def load_package_sources(): """Discover all sources listed in :file:`sources.json`.""" - try: - ref = importlib.resources.files("sdmx").joinpath("sources.json") - except AttributeError: # Python <3.9 - from copy import copy - - with importlib.resources.path("sdmx", "sources.json") as path: - ref = copy(path) - - with ref.open("rb") as f: + with importlib.resources.files("sdmx").joinpath("sources.json").open("rb") as f: for info in json.load(f): add_source(info) diff --git a/sdmx/sources.json b/sdmx/sources.json index 8b185454..f6d6e939 100644 --- a/sdmx/sources.json +++ b/sdmx/sources.json @@ -26,6 +26,32 @@ "url": "https://api.data.abs.gov.au", "name": "Australian Bureau of Statistics" }, + { + "id": "AR1", + "name": "Argentina", + "url": "https://sdds.indec.gob.ar/files/", + "supports": { + "actualconstraint": false, + "allowedconstraint": false, + "agencyscheme": false, + "categorisation": false, + "categoryscheme": false, + "codelist": false, + "conceptscheme": false, + "contentconstraint": false, + "dataconsumerscheme": false, + "dataproviderscheme": false, + "dataflow": false, + "datastructure": false, + "hierarchicalcodelist": false, + "metadataflow": false, + "metadatastructure": false, + "organisationscheme": false, + "provisionagreement": false, + "structure": false, + "structureset": false + } + }, { "id": "BBK", "url": "https://api.statistiken.bundesbank.de/rest", @@ -202,6 +228,19 @@ "provisionagreement": false } }, + { + "id": "IMF_beta", + "url": "https://api.imf.org/external/sdmx/2.1", + "name": "International Monetary Fund" + }, + { + "id": "IMF_beta3", + "url": "https://api.imf.org/external/sdmx/3.0", + "name": "International Monetary Fund", + "versions": [ + "3.0.0" + ] + }, { "id": "INEGI", "url": "http://sdmx.snieg.mx/service/rest", @@ -357,6 +396,32 @@ "url": "http://andmebaas.stat.ee/sdmx-json", "name": "Statistics Estonia" }, + { + "id": "StatCan", + "name": "Statistics Canada", + "url": "https://www150.statcan.gc.ca/t1/wds/sdmx/statcan/rest/", + "supports": { + "actualconstraint": false, + "allowedconstraint": false, + "agencyscheme": false, + "categorisation": false, + "categoryscheme": false, + "codelist": false, + "conceptscheme": false, + "contentconstraint": false, + "dataconsumerscheme": false, + "dataproviderscheme": false, + "dataflow": false, + "datastructure": false, + "hierarchicalcodelist": false, + "metadataflow": false, + "metadatastructure": false, + "organisationscheme": false, + "provisionagreement": false, + "structure": false, + "structureset": false + } + }, { "id": "UNESCO", "name": "UN Educational, Scientific and Cultural Organization", @@ -412,6 +477,21 @@ "preview": true } }, + { + "id": "UY110", + "name": "Uruguay", + "url": "https://sdmx-mtss.simel.mtss.gub.uy/rest", + "supports": { + "agencyscheme": false, + "dataconsumerscheme": false, + "dataproviderscheme": false, + "hierarchicalcodelist": false, + "metadataflow": false, + "metadatastructure": false, + "provisionagreement": false, + "structureset": false + } + }, { "id": "WB", "name": "World Bank World Integrated Trade Solution", diff --git a/sdmx/testing/__init__.py b/sdmx/testing/__init__.py index 58daf342..48842656 100644 --- a/sdmx/testing/__init__.py +++ b/sdmx/testing/__init__.py @@ -1,5 +1,6 @@ import logging import os +import re from collections import ChainMap from pathlib import Path from typing import TYPE_CHECKING, Union @@ -7,12 +8,14 @@ import numpy as np import pandas as pd import pytest +import responses from xdist import is_xdist_worker from sdmx.exceptions import HTTPError +from sdmx.format import Version from sdmx.rest import Resource from sdmx.session import Session -from sdmx.source import DataContentType, Source, sources +from sdmx.source import DataContentType, Source, get_source from sdmx.testing.report import ServiceReporter from sdmx.util.requests import offline @@ -151,7 +154,7 @@ class (e.g. :class:`.DataSourceTest` subclass). # Use the test class' source_id attr to look up the Source class cls = metafunc.cls source = ( - sources[cls.source_id] + get_source(cls.source_id) if cls.source_id != "TEST" else metafunc.config.stash[KEY_SOURCE] ) @@ -225,6 +228,44 @@ def msg(self, path): return sdmx.read_sdmx(path / self.filename) +@pytest.fixture(scope="session") +def installed_schemas(mock_gh_api, tmp_path_factory): + """Fixture that ensures schemas are installed locally in a temporary directory.""" + from sdmx.format.xml.common import install_schemas + + dir = tmp_path_factory.mktemp("schemas") + + with mock_gh_api: + install_schemas(dir.joinpath("2.1"), Version["2.1"]) + install_schemas(dir.joinpath("3.0"), Version["3.0.0"]) + + yield dir + + +@pytest.fixture(scope="session") +def mock_gh_api(): + """Mock GitHub API responses to avoid hitting rate limits. + + For each API endpoint URL queried by :func:.`_gh_zipball`, return a pared-down JSON + response that contains the required "zipball_url" key. + """ + base = "https://api.github.com/repos/sdmx-twg/sdmx-ml" + + # TODO Improve .util.requests to provide (roughly) the same functionality, then drop + # use of responses here + mock = responses.RequestsMock(assert_all_requests_are_fired=False) + mock.add_passthru(re.compile(rf"{base}/zipball/\w+")) + mock.add_passthru(re.compile(r"https://codeload.github.com/\w+")) + + for v in "2.1", "3.0", "3.0.0": + mock.get( + url=f"{base}/releases/tags/v{v}", + json=dict(zipball_url=f"{base}/zipball/v{v}"), + ) + + yield mock + + @pytest.fixture(scope="session") def session_with_pytest_cache(pytestconfig): """Fixture: A :class:`.Session` that caches within :file:`.pytest_cache`. @@ -272,6 +313,8 @@ def test_data_path(pytestconfig): @pytest.fixture(scope="class") def testsource(pytestconfig): """Fixture: the :attr:`.Source.id` of a temporary data source.""" + from sdmx.source import sources + s = pytestconfig.stash[KEY_SOURCE] sources[s.id] = s diff --git a/sdmx/testing/data.py b/sdmx/testing/data.py index 2a4ed5c9..e1311d4d 100644 --- a/sdmx/testing/data.py +++ b/sdmx/testing/data.py @@ -296,8 +296,10 @@ def add_specimens(target: list[tuple[Path, str, Optional[str]]], base: Path) -> target.extend( (base.joinpath(*parts), "xml", "data") for parts in [ + ("constructed", "gh-218.xml"), ("INSEE", "CNA-2010-CONSO-SI-A17.xml"), ("INSEE", "IPI-2010-A21.xml"), + ("IMF", "PCPS.xml"), ("ESTAT", "esms.xml"), ("ESTAT", "footer.xml"), ("ESTAT", "NAMA_10_GDP-ss.xml"), @@ -310,6 +312,7 @@ def add_specimens(target: list[tuple[Path, str, Optional[str]]], base: Path) -> for parts in [ ("BIS", "actualconstraint-0.xml"), ("BIS", "hierarchicalcodelist-0.xml"), + ("BIS", "gh-180.xml"), ("ECB", "orgscheme.xml"), ("ECB", "structureset-0.xml"), ("ESTAT", "apro_mk_cola-structure.xml"), @@ -318,6 +321,7 @@ def add_specimens(target: list[tuple[Path, str, Optional[str]]], base: Path) -> ("ESTAT", "HCL_WSTATUS_SCL_BNSPART.xml"), ("ESTAT", "HCL_WSTATUS_SCL_WSTATUSPR.xml"), ("IAEG-SDGs", "metadatastructure-0.xml"), + ("IMF", "01R.xml"), ("IMF", "1PI-structure.xml"), ("IMF", "CL_AREA-structure.xml"), # Manually reduced subset of the response for this DSD. Test for diff --git a/sdmx/tests/format/test_format_xml.py b/sdmx/tests/format/test_format_xml.py index 8186eac4..38ff6a6b 100644 --- a/sdmx/tests/format/test_format_xml.py +++ b/sdmx/tests/format/test_format_xml.py @@ -3,7 +3,6 @@ from pathlib import Path import pytest -import responses import sdmx from sdmx.format import Version, xml @@ -31,51 +30,6 @@ def test_class_for_tag(): assert xml.v30.class_for_tag("str:DataStructure") is not None -@pytest.fixture(scope="module") -def mock_gh_api(): - """Mock GitHub API responses to avoid hitting rate limits. - - For each API endpoint URL queried by :func:.`_gh_zipball`, return a pared-down JSON - response that contains the required "zipball_url" key. - """ - base = "https://api.github.com/repos/sdmx-twg/sdmx-ml" - - # TODO Improve .util.requests to provide (roughly) the same functionality, then drop - # use of responses here - mock = responses.RequestsMock(assert_all_requests_are_fired=False) - mock.add_passthru(re.compile(rf"{base}/zipball/\w+")) - mock.add_passthru(re.compile(r"https://codeload.github.com/\w+")) - - for v in "2.1", "3.0", "3.0.0": - mock.get( - url=f"{base}/releases/tags/v{v}", - json=dict(zipball_url=f"{base}/zipball/v{v}"), - ) - - mock.start() - - try: - yield - finally: - mock.stop() - - -@pytest.fixture(scope="module") -def installed_schemas(mock_gh_api, tmp_path_factory): - """Fixture that ensures schemas are installed locally in a temporary directory.""" - dir = tmp_path_factory.mktemp("schemas") - sdmx.install_schemas(dir.joinpath("2.1"), Version["2.1"]) - sdmx.install_schemas(dir.joinpath("3.0"), Version["3.0.0"]) - yield dir - - -@pytest.mark.parametrize("version", ["1", 1, None]) -def test_install_schemas_invalid_version(version): - """Ensure invalid versions throw ``NotImplementedError``.""" - with pytest.raises(NotImplementedError): - sdmx.install_schemas(version=version) - - @pytest.mark.network @pytest.mark.parametrize("version", ["2.1", "3.0"]) def test_install_schemas(installed_schemas, version): @@ -101,28 +55,15 @@ def test_install_schemas_in_user_cache(): @pytest.mark.parametrize("version", ["1", 1, None]) -def test_validate_xml_invalid_version(version): - """Ensure validation of invalid versions throw ``NotImplementedError``.""" +def test_install_schemas_invalid_version(version): + """Ensure invalid versions throw ``NotImplementedError``.""" with pytest.raises(NotImplementedError): - # This message doesn't exist, but the version should throw before it is used. - sdmx.validate_xml("samples/common/common.xml", version=version) - - -def test_validate_xml_no_schemas(tmp_path, specimen, installed_schemas): - """Check that supplying an invalid schema path will raise ``ValueError``.""" - with specimen("IPI-2010-A21-structure.xml", opened=False) as msg_path: - with pytest.raises(ValueError): - sdmx.validate_xml(msg_path, schema_dir=tmp_path) + sdmx.install_schemas(version=version) @pytest.mark.network def test_validate_xml_from_v2_1_samples(tmp_path, specimen, installed_schemas): """Use official samples to ensure validation of v2.1 messages works correctly.""" - extracted_content = _extracted_zipball(Version["2.1"]) - - # Schemas as just in a flat directory - schema_dir = extracted_content.joinpath("schemas") - # Samples are somewhat spread out, and some are known broken so we pick a bunch for parts in [ ("v21", "xml", "common", "common.xml"), @@ -137,7 +78,31 @@ def test_validate_xml_from_v2_1_samples(tmp_path, specimen, installed_schemas): ("v21", "xml", "query", "response_esms_children.xml"), ]: with specimen(str(Path(*parts))) as sample: - assert sdmx.validate_xml(sample, schema_dir, version="2.1") + assert sdmx.validate_xml( + sample, installed_schemas.joinpath("2.1"), version="2.1" + ) + + +@pytest.mark.network +def test_validate_xml_from_v3_0_samples(tmp_path, installed_schemas): + """Use official samples to ensure validation of v3.0 messages works correctly.""" + extracted_content = _extracted_zipball(Version["3.0.0"]) + + # Schemas as just in a flat directory + schema_dir = extracted_content.joinpath("schemas") + + # Samples are somewhat spread out, and some are known broken so we pick a bunch + samples_dir = extracted_content.joinpath("samples") + samples = [ + samples_dir / "Codelist" / "codelist.xml", + samples_dir / "Codelist" / "codelist - extended.xml", + samples_dir / "Concept Scheme" / "conceptscheme.xml", + samples_dir / "Data Structure Definition" / "ECB_EXR.xml", + samples_dir / "Dataflow" / "dataflow.xml", + samples_dir / "Geospatial" / "geospatial_geographiccodelist.xml", + ] + for sample in samples: + assert sdmx.validate_xml(sample, schema_dir, version="3.0") @pytest.mark.network @@ -174,33 +139,26 @@ def test_validate_xml_invalid_doc(tmp_path, installed_schemas): assert not sdmx.validate_xml(msg_path, schema_dir=installed_schemas.joinpath("2.1")) -def test_validate_xml_invalid_message_type(): +def test_validate_xml_invalid_message_type(installed_schemas): """Ensure that an invalid document fails validation.""" # Create a mangled structure message with its outmost tag changed to be invalid msg = StructureMessage() invalid_msg = re.sub(b"mes:Structure([ >])", rb"mes:FooBar\1", sdmx.to_xml(msg)) - with pytest.raises(NotImplementedError): - sdmx.validate_xml(io.BytesIO(invalid_msg)) + with pytest.raises(NotImplementedError, match="Validate non-SDMX root.*FooBar>"): + sdmx.validate_xml(io.BytesIO(invalid_msg), installed_schemas) -@pytest.mark.network -def test_validate_xml_from_v3_0_samples(tmp_path, installed_schemas): - """Use official samples to ensure validation of v3.0 messages works correctly.""" - extracted_content = _extracted_zipball(Version["3.0.0"]) +@pytest.mark.parametrize("version", ["1", 1, None]) +def test_validate_xml_invalid_version(version): + """Ensure validation of invalid versions throw ``NotImplementedError``.""" + with pytest.raises(NotImplementedError): + # This message doesn't exist, but the version should throw before it is used. + sdmx.validate_xml("samples/common/common.xml", version=version) - # Schemas as just in a flat directory - schema_dir = extracted_content.joinpath("schemas") - # Samples are somewhat spread out, and some are known broken so we pick a bunch - samples_dir = extracted_content.joinpath("samples") - samples = [ - samples_dir / "Codelist" / "codelist.xml", - samples_dir / "Codelist" / "codelist - extended.xml", - samples_dir / "Concept Scheme" / "conceptscheme.xml", - samples_dir / "Data Structure Definition" / "ECB_EXR.xml", - samples_dir / "Dataflow" / "dataflow.xml", - samples_dir / "Geospatial" / "geospatial_geographiccodelist.xml", - ] - for sample in samples: - assert sdmx.validate_xml(sample, schema_dir, version="3.0") +def test_validate_xml_no_schemas(tmp_path, specimen): + """Check that supplying an invalid schema path will raise ``ValueError``.""" + with specimen("IPI-2010-A21-structure.xml", opened=False) as msg_path: + with pytest.raises(FileNotFoundError): + sdmx.validate_xml(msg_path, schema_dir=tmp_path) diff --git a/sdmx/tests/reader/test_reader_xml_v21.py b/sdmx/tests/reader/test_reader_xml_v21.py index f8e4b922..f09deb32 100644 --- a/sdmx/tests/reader/test_reader_xml_v21.py +++ b/sdmx/tests/reader/test_reader_xml_v21.py @@ -11,6 +11,7 @@ import sdmx import sdmx.message from sdmx import urn +from sdmx.format.xml import validate_xml from sdmx.format.xml.v21 import qname from sdmx.model import common, v21 from sdmx.model.v21 import ContentConstraint, Facet, FacetType, FacetValueType @@ -232,6 +233,29 @@ def test_gh_164(specimen): assert isinstance(da.related_to, v21.NoSpecifiedRelationship) +def test_gh_180(caplog, installed_schemas, specimen) -> None: + """Test of https://github.com/khaeru/sdmx/issues/190.""" + with specimen("BIS/gh-180.xml") as f: + # Message is not valid SDMX-ML + assert False is validate_xml(f, installed_schemas) + + # Validation logs an error message regarding the non-standard class + assert re.match( + ".*attribute 'package'.*'publicationtable' is not an element of the set", + caplog.messages[-2], + ) + + # Message can still be read + f.seek(0) + msg = sdmx.read_sdmx(f) + assert isinstance(msg, sdmx.message.StructureMessage) + + # Reader logs a warning regarding the missing reference + assert re.match( + "Cannot resolve reference to non-SDMX class", caplog.messages[-1] + ) + + def test_gh_199(): """Test of https://github.com/khaeru/sdmx/issues/199.""" import sdmx.format.xml.v21 @@ -293,6 +317,36 @@ def test_gh_205(caplog, specimen) -> None: assert text == str(a.text) +def test_gh_218(caplog, installed_schemas, specimen) -> None: + """Test of https://github.com/khaeru/sdmx/pull/218.""" + with specimen("constructed/gh-218.xml") as f: + # Specimen is XSD-valid + validate_xml(f, installed_schemas) + + f.seek(0) + + # Specimen can be read + msg = sdmx.read_sdmx(f) + + # The message sender has 1 contact, with all attributes populated + assert isinstance(msg, sdmx.message.DataMessage) and msg.header.sender + assert 1 == len(msg.header.sender.contact) + contact = msg.header.sender.contact[0] + assert contact.telephone is not None + assert ( + 1 + # Number of localizations of localizable attributes + == len(contact.name.localizations) + == len(contact.org_unit.localizations) + == len(contact.responsibility.localizations) + # Number of values of multi-value attributes + == len(contact.email) + == len(contact.fax) + == len(contact.uri) + == len(contact.x400) + ) + + # Each entry is a tuple with 2 elements: # 1. an instance of lxml.etree.Element to be parsed. # 2. Either: diff --git a/sdmx/tests/test_source.py b/sdmx/tests/test_source.py index e3c0143c..2a9afb15 100644 --- a/sdmx/tests/test_source.py +++ b/sdmx/tests/test_source.py @@ -7,7 +7,7 @@ def test_list_sources(): source_ids = list_sources() # Correct number of sources, excluding those created for testing - assert 29 == len(set(source_ids) - {"MOCK", "TEST"}) + assert 32 == len(set(source_ids) - {"MOCK", "TEST"}) # Listed alphabetically assert "ABS" == source_ids[0] diff --git a/sdmx/tests/test_sources.py b/sdmx/tests/test_sources.py index 29e39696..7a725e2c 100644 --- a/sdmx/tests/test_sources.py +++ b/sdmx/tests/test_sources.py @@ -120,6 +120,19 @@ class TestABS_JSON(DataSourceTest): } +class TestAR1(DataSourceTest): + source_id = "AR1" + + endpoint_args = dict( + data=dict(resource_id="WOE.XML"), + ) + + xfail = { + "metadata": NotImplementedError, # Internal to sdmx1 + "registration": ValueError, # Internal to sdmx1 + } + + class TestBBK(DataSourceTest): source_id = "BBK" @@ -355,6 +368,75 @@ class TestIMF(DataSourceTest): source_id = "IMF" +# As of 2025-01-10, all endpoints aside from SDMX 2.1 /data/ return 403 +IMF_BETA_XFAIL: dict[str, Union[type[Exception], tuple[type[Exception], str]]] = { + k: HTTPError + for k in """ + actualconstraint + agencyscheme + allowedconstraint + categorisation + categoryscheme + codelist + conceptscheme + contentconstraint + dataconsumerscheme + dataflow + dataproviderscheme + datastructure + hierarchicalcodelist + metadataflow + metadatastructure + organisationscheme + provisionagreement + registration + structure + structureset + """.split() +} + + +class TestIMF_beta(DataSourceTest): + source_id = "IMF_beta" + + endpoint_args = dict( + # As indicated in the API documentation + data=dict( + resource_id="CPI", + key="111.CPI.CP01.IX.M", + params=dict(startPeriod=2018), + # Does not appear to affect 403 + # headers={"User-Agent": "idata-script-client"}, + ) + ) + + xfail = IMF_BETA_XFAIL | dict( + metadata=NotImplementedError, + registration=ValueError, + ) + + +class TestIMF_beta3(DataSourceTest): + source_id = "IMF_beta3" + + endpoint_args = dict( + data=dict( + context="dataflow", + agency_id="IMF", + resource_id="CPI", + key="111.CPI.CP01.IX.M", + # Not yet supported + # params={"c[TIME_PERIOD]": "ge:2018"}, + ), + metadata=dict(provider_id="IMF"), + ) + + xfail = IMF_BETA_XFAIL | dict( + data=HTTPError, # 403 + metadata=HTTPError, # 403 + ) + + class TestINEGI(DataSourceTest): source_id = "INEGI" @@ -581,6 +663,24 @@ class TestSTAT_EE(DataSourceTest): } +class TestStatCan(DataSourceTest): + source_id = "StatCan" + + endpoint_args = dict( + data=dict( + resource_id="DF_17100005", + key=".1.138", + params=dict(startPeriod=2015, endPeriod=2016), + ), + structure=dict(resource_id="Data_Structure_17100005"), + ) + + xfail = { + "metadata": NotImplementedError, # Internal to sdmx1 + "registration": ValueError, # Internal to sdmx1 + } + + class TestUNESCO(DataSourceTest): """UNESCO. @@ -635,6 +735,19 @@ class TestUNSD(DataSourceTest): } +class TestUY110(DataSourceTest): + source_id = "UY110" + + xfail = { + "metadata": NotImplementedError, # Internal to sdmx1 + # 400: "Can not create reference, target structure is not maintainable, and no + # identifiable reference parameters present" + "organisationscheme": HTTPError, + "registration": ValueError, # Internal to sdmx1 + "structure": NotImplementedError, # 501 + } + + class TestWB(DataSourceTest): source_id = "WB" xfail = { diff --git a/sdmx/urn.py b/sdmx/urn.py index b1395fa5..a6f1e660 100644 --- a/sdmx/urn.py +++ b/sdmx/urn.py @@ -48,6 +48,7 @@ def __init__(self, value: Optional[str], **kwargs) -> None: self.__dict__.update(kwargs) if value is None: + self.groupdict = {} # Needed by match() return try: diff --git a/sdmx/writer/pandas.py b/sdmx/writer/pandas.py index dc59b05d..081ffeeb 100644 --- a/sdmx/writer/pandas.py +++ b/sdmx/writer/pandas.py @@ -324,7 +324,12 @@ def write_dataset( # noqa: C901 TODO reduce complexity 12 → ≤10 if len(result): result.index.names = observation.key.order().values.keys() if dtype: - result["value"] = result["value"].astype(dtype) + try: + result["value"] = result["value"].astype(dtype) + except ValueError: + # Attempt to handle locales in which LC_NUMERIC.decimal_point is "," + # TODO Make this more robust by inferring and changing locale settings + result["value"] = result["value"].str.replace(",", ".").astype(dtype) if not attributes: result = result["value"]