Skip to content

Commit

Permalink
[SchemaRegistry] handle unknown content types (#37166)
Browse files Browse the repository at this point in the history
* [SchemaRegistry] handle unknown content types

* remove extra space in Accept headers

* fix license in tests

* fix spacing in accept header async

* add tests with mocked unknown content type

* mypy/lint

* cspell
  • Loading branch information
swathipil authored Sep 6, 2024
1 parent f17312e commit 6db6bd9
Show file tree
Hide file tree
Showing 9 changed files with 522 additions and 41 deletions.
5 changes: 3 additions & 2 deletions sdk/schemaregistry/azure-schemaregistry/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,16 @@ This version and all future versions will require Python 3.8+. Python 3.7 is no

### Features Added

- `V2022_10` has been added to `ApiVersion` and set as the default API version.
- `Json` and `Custom` have been added to supported formats in `SchemaFormat`.
- Sync and async `JsonSchemaEncoder` have been added under `azure.schemaregistry.encoder.jsonencoder`.
- `InvalidContentError` have been added under `azure.schemaregistry.encoder.jsonencoder` for use with the `JsonSchemaEncoder`.
- `MessageContent`, `OutboundMessageContent`,`InboundMessageContent`, and `SchemaContentValidate` have been added under `azure.schemaregistry` as protocols for use with the `JsonSchemaEncoder` and/or future encoder implementations.
- `Json` and `Custom` have been added to supported formats in `SchemaFormat`.
- `V2022_10` has been added to `ApiVersion` and set as the default API version.

### Bugs Fixed

- Fixed a bug in sync/async `register_schema` and `get_schema_properties` that did not accept case insensitive strings as an argument to the `format` parameter.
- Fixed a bug where unknown content type strings from the service raised a client error, rather than being returned as a string in the SchemaProperties `format` property.

### Other Changes

Expand Down
2 changes: 1 addition & 1 deletion sdk/schemaregistry/azure-schemaregistry/assets.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
"AssetsRepo": "Azure/azure-sdk-assets",
"AssetsRepoPrefixPath": "python",
"TagPrefix": "python/schemaregistry/azure-schemaregistry",
"Tag": "python/schemaregistry/azure-schemaregistry_a1c9d18bfd"
"Tag": "python/schemaregistry/azure-schemaregistry_0a27c7561c"
}
109 changes: 89 additions & 20 deletions sdk/schemaregistry/azure-schemaregistry/azure/schemaregistry/_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,13 @@
overload,
IO,
)
from enum import Enum
from typing_extensions import Protocol, TypedDict, Self

from azure.core import CaseInsensitiveEnumMeta
from azure.core.tracing.decorator import distributed_trace

from ._client import SchemaRegistryClient as GeneratedServiceClient
from .models._patch import SchemaFormat
from .models._patch import SchemaFormat, NormalizedSchemaContentTypes

if TYPE_CHECKING:
from azure.core.credentials import TokenCredential
Expand Down Expand Up @@ -57,22 +58,24 @@ def _parse_schema_properties_dict(response_headers: Mapping[str, Union[str, int]
"version": int(response_headers["Schema-Version"]),
}

def _normalize_content_type(content_type: str) -> str:
return content_type.replace(" ", "").lower()

def _get_format(content_type: str) -> SchemaFormat:
def _get_format(content_type: str) -> Union[SchemaFormat, str]:
# pylint:disable=redefined-builtin
# Exception cases may be due to forward compatibility.
# i.e. Getting a schema with a content type from a future API version.
# In this case, we default to CUSTOM format.
try:
format = content_type.split("serialization=")[1]
try:
return SchemaFormat(format.capitalize())
except ValueError:
pass
except IndexError:
pass
return SchemaFormat.CUSTOM

# In this case, we default to returning the content type string.

# remove whitespace and case from string
normalized_content_type = _normalize_content_type(content_type)
if normalized_content_type == NormalizedSchemaContentTypes.AVRO.value:
return SchemaFormat.AVRO
if normalized_content_type == NormalizedSchemaContentTypes.JSON.value:
return SchemaFormat.JSON
if normalized_content_type == NormalizedSchemaContentTypes.CUSTOM.value:
return SchemaFormat.CUSTOM
return content_type

def prepare_schema_properties_result( # pylint:disable=unused-argument,redefined-builtin
format: str,
Expand Down Expand Up @@ -220,10 +223,62 @@ def register_schema( # pylint:disable=arguments-differ
return SchemaProperties(**properties)

@overload
def get_schema(self, schema_id: str, **kwargs: Any) -> Schema: ...
def get_schema(self, schema_id: str, **kwargs: Any) -> Schema:
"""Gets a registered schema.
To get a registered schema by its unique ID, pass the `schema_id` parameter and any optional
keyword arguments. Azure Schema Registry guarantees that ID is unique within a namespace.
WARNING: If retrieving a schema format that is unsupported by this client version, upgrade to a client
version that supports the schema format. Otherwise, the content MIME type string will be returned as
the `format` value in the `properties` of the returned Schema.
:param str schema_id: References specific schema in registry namespace. Required if `group_name`,
`name`, and `version` are not provided.
:return: The schema stored in the registry associated with the provided arguments.
:rtype: ~azure.schemaregistry.Schema
:raises: :class:`~azure.core.exceptions.HttpResponseError`
.. admonition:: Example:
.. literalinclude:: ../samples/sync_samples/sample_code_schemaregistry.py
:start-after: [START get_schema_sync]
:end-before: [END get_schema_sync]
:language: python
:dedent: 4
:caption: Get schema by id.
"""
...

@overload
def get_schema(self, *, group_name: str, name: str, version: int, **kwargs: Any) -> Schema: ...
def get_schema(self, *, group_name: str, name: str, version: int, **kwargs: Any) -> Schema:
"""Gets a registered schema.
To get a specific version of a schema within the specified schema group, pass in the required
keyword arguments `group_name`, `name`, and `version` and any optional keyword arguments.
WARNING: If retrieving a schema format that is unsupported by this client version, upgrade to a client
version that supports the schema format. Otherwise, the content MIME type string will be returned as
the `format` value in the `properties` of the returned Schema.
:keyword str group_name: Name of schema group that contains the registered schema.
:keyword str name: Name of schema which should be retrieved.
:keyword int version: Version of schema which should be retrieved.
:return: The schema stored in the registry associated with the provided arguments.
:rtype: ~azure.schemaregistry.Schema
:raises: :class:`~azure.core.exceptions.HttpResponseError`
.. admonition:: Example:
.. literalinclude:: ../samples/sync_samples/sample_code_schemaregistry.py
:start-after: [START get_schema_by_version_sync]
:end-before: [END get_schema_by_version_sync]
:language: python
:dedent: 4
:caption: Get schema by version.
"""
...

@distributed_trace
def get_schema( # pylint: disable=docstring-missing-param,docstring-should-be-keyword
Expand All @@ -237,6 +292,10 @@ def get_schema( # pylint: disable=docstring-missing-param,docstring-should-be-k
2) To get a specific version of a schema within the specified schema group, pass in the required
keyword arguments `group_name`, `name`, and `version` and any optional keyword arguments.
WARNING: If retrieving a schema format that is unsupported by this client version, upgrade to a client
version that supports the schema format. Otherwise, the content MIME type string will be returned as
the `format` value in the `properties` of the returned Schema.
:param str schema_id: References specific schema in registry namespace. Required if `group_name`,
`name`, and `version` are not provided.
:keyword str group_name: Name of schema group that contains the registered schema.
Expand Down Expand Up @@ -281,8 +340,8 @@ def get_schema( # pylint: disable=docstring-missing-param,docstring-should-be-k
id=schema_id,
cls=prepare_schema_result,
headers={ # TODO: remove when multiple content types in response are supported
"Accept": """application/json; serialization=Avro, application/json; \
serialization=json, text/plain; charset=utf-8"""
"Accept": """application/json; serialization=Avro, application/json; """
"""serialization=json, text/plain; charset=utf-8"""
},
stream=True,
**http_request_kwargs,
Expand All @@ -305,8 +364,8 @@ def get_schema( # pylint: disable=docstring-missing-param,docstring-should-be-k
schema_version=version,
cls=prepare_schema_result,
headers={ # TODO: remove when multiple content types in response are supported
"Accept": """application/json; serialization=Avro, application/json; \
serialization=json, text/plain; charset=utf-8"""
"Accept": """application/json; serialization=Avro, application/json; """
"""serialization=json, text/plain; charset=utf-8"""
},
stream=True,
**http_request_kwargs,
Expand Down Expand Up @@ -414,6 +473,16 @@ def __init__(self, **kwargs: Any) -> None:
def __repr__(self) -> str:
return f"Schema(definition={self.definition}, properties={self.properties})"[:1024]

# ApiVersion was added to a previously GA'd version. However, newer libraries should not
# accept ApiVersion enums and only take strings. Leaving this here for backwards compatibility.
class ApiVersion(str, Enum, metaclass=CaseInsensitiveEnumMeta):
"""
Represents the Schema Registry API version to use for requests.
"""

V2021_10 = "2021-10"
V2022_10 = "2022-10"
"""This is the default version."""

###### Encoder Protocols ######

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,62 @@ async def register_schema(
return SchemaProperties(**properties)

@overload
async def get_schema(self, schema_id: str, **kwargs: Any) -> Schema: ...
async def get_schema(self, schema_id: str, **kwargs: Any) -> Schema:
"""Gets a registered schema.
To get a registered schema by its unique ID, pass the `schema_id` parameter and any optional
keyword arguments. Azure Schema Registry guarantees that ID is unique within a namespace.
WARNING: If retrieving a schema format that is unsupported by this client version, upgrade to a client
version that supports the schema format. Otherwise, the content MIME type string will be returned as
the `format` value in the `properties` of the returned Schema.
:param str schema_id: References specific schema in registry namespace. Required if `group_name`,
`name`, and `version` are not provided.
:return: The schema stored in the registry associated with the provided arguments.
:rtype: ~azure.schemaregistry.Schema
:raises: :class:`~azure.core.exceptions.HttpResponseError`
.. admonition:: Example:
.. literalinclude:: ../samples/async_samples/sample_code_schemaregistry_async.py
:start-after: [START get_schema_async]
:end-before: [END get_schema_async]
:language: python
:dedent: 4
:caption: Get schema by id.
"""
...

@overload
async def get_schema(self, *, group_name: str, name: str, version: int, **kwargs: Any) -> Schema: ...
async def get_schema(self, *, group_name: str, name: str, version: int, **kwargs: Any) -> Schema:
"""Gets a registered schema.
To get a specific version of a schema within the specified schema group, pass in the required
keyword arguments `group_name`, `name`, and `version` and any optional keyword arguments.
WARNING: If retrieving a schema format that is unsupported by this client version, upgrade to a client
version that supports the schema format. Otherwise, the content MIME type string will be returned as
the `format` value in the `properties` of the returned Schema.
:keyword str group_name: Name of schema group that contains the registered schema.
:keyword str name: Name of schema which should be retrieved.
:keyword int version: Version of schema which should be retrieved.
:return: The schema stored in the registry associated with the provided arguments.
:rtype: ~azure.schemaregistry.Schema
:raises: :class:`~azure.core.exceptions.HttpResponseError`
.. admonition:: Example:
.. literalinclude:: ../samples/async_samples/sample_code_schemaregistry_async.py
:start-after: [START get_schema_by_version_async]
:end-before: [END get_schema_by_version_async]
:language: python
:dedent: 4
:caption: Get schema by version.
"""
...

@distributed_trace_async
async def get_schema( # pylint: disable=docstring-missing-param,docstring-should-be-keyword
Expand All @@ -160,6 +212,10 @@ async def get_schema( # pylint: disable=docstring-missing-param,docstring-shoul
2) To get a specific version of a schema within the specified schema group, pass in the required
keyword arguments `group_name`, `name`, and `version` and any optional keyword arguments.
WARNING: If retrieving a schema format that is unsupported by this client version, upgrade to a client
version that supports the schema format. Otherwise, the content MIME type string will be returned as
the `format` value in the `properties` of the returned Schema.
:param str schema_id: References specific schema in registry namespace. Required if `group_name`,
`name`, and `version` are not provided.
:keyword str group_name: Name of schema group that contains the registered schema.
Expand Down Expand Up @@ -204,8 +260,8 @@ async def get_schema( # pylint: disable=docstring-missing-param,docstring-shoul
id=schema_id,
cls=prepare_schema_result,
headers={ # TODO: remove when multiple content types are supported
"Accept": """application/json; serialization=Avro, application/json; \
serialization=json, text/plain; charset=utf-8"""
"Accept": """application/json; serialization=Avro, application/json; """
"""serialization=json, text/plain; charset=utf-8"""
},
stream=True,
**http_request_kwargs,
Expand All @@ -229,8 +285,8 @@ async def get_schema( # pylint: disable=docstring-missing-param,docstring-shoul
schema_version=version,
cls=prepare_schema_result,
headers={ # TODO: remove when multiple content types are supported
"Accept": """application/json; serialization=Avro, application/json; \
serialization=json, text/plain; charset=utf-8"""
"Accept": """application/json; serialization=Avro, application/json; """
"""serialization=json, text/plain; charset=utf-8"""
},
stream=True,
**http_request_kwargs,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
from typing import List

from enum import Enum

from azure.core import CaseInsensitiveEnumMeta
from ._enums import SchemaContentTypeValues


class SchemaFormat(str, Enum, metaclass=CaseInsensitiveEnumMeta):
Expand All @@ -22,18 +24,16 @@ class SchemaFormat(str, Enum, metaclass=CaseInsensitiveEnumMeta):
CUSTOM = "Custom"
"""Represents a custom schema format."""

# Normalizing the schema content type strings for whitespace and case insensitive comparison.
class NormalizedSchemaContentTypes(str, Enum, metaclass=CaseInsensitiveEnumMeta):
"""Describes closed list of normalized schema content type values."""

class ApiVersion(str, Enum, metaclass=CaseInsensitiveEnumMeta):
"""
Represents the Schema Registry API version to use for requests.
"""

V2021_10 = "2021-10"
V2022_10 = "2022-10"
"""This is the default version."""


DEFAULT_VERSION = ApiVersion.V2022_10
AVRO = SchemaContentTypeValues.AVRO.value.replace(" ", "").lower()
"""Avro encoding."""
JSON = SchemaContentTypeValues.JSON.value.replace(" ", "").lower()
"""JSON encoding"""
CUSTOM = SchemaContentTypeValues.CUSTOM.value.replace(" ", "").lower()
"""Plain text custom encoding."""


__all__: List[str] = [] # Add all objects you want publicly available to users at this package level
Expand Down
Loading

0 comments on commit 6db6bd9

Please sign in to comment.