-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add KafkaSchemaRegistrySource in the external providers in the Python…
… binding (#10)
- Loading branch information
Showing
8 changed files
with
227 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
104 changes: 104 additions & 0 deletions
104
python/metadata_guardian/source/external/kafka_schema_registry_source.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
import json | ||
from dataclasses import dataclass | ||
from enum import Enum | ||
from typing import Any, List, Optional | ||
|
||
from loguru import logger | ||
|
||
from .external_metadata_source import ( | ||
ExternalMetadataSource, | ||
ExternalMetadataSourceException, | ||
) | ||
|
||
try: | ||
from confluent_kafka.schema_registry import SchemaRegistryClient | ||
|
||
KAFKA_SCHEMA_REGISTRY_INSTALLED = True | ||
except ImportError: | ||
logger.debug("Kafka Schema Registry optional dependency is not installed.") | ||
KAFKA_SCHEMA_REGISTRY_INSTALLED = False | ||
|
||
if KAFKA_SCHEMA_REGISTRY_INSTALLED: | ||
|
||
class KafkaSchemaRegistryAuthentication(Enum): | ||
"""Authentication method for Kafka Schema Registry source.""" | ||
|
||
USER_PWD = 1 | ||
|
||
@dataclass | ||
class KafkaSchemaRegistrySource(ExternalMetadataSource): | ||
"""Instance of a Kafka Schema Registry source.""" | ||
|
||
url: str | ||
ssl_certificate_location: Optional[str] = None | ||
ssl_key_location: Optional[str] = None | ||
connection: Optional[Any] = None | ||
authenticator: Optional[ | ||
KafkaSchemaRegistryAuthentication | ||
] = KafkaSchemaRegistryAuthentication.USER_PWD | ||
comment_field_name: str = "doc" | ||
|
||
def get_connection(self) -> None: | ||
""" | ||
Get the connection of the Kafka Schema Registry. | ||
:return: | ||
""" | ||
if self.authenticator == KafkaSchemaRegistryAuthentication.USER_PWD: | ||
self.connection = SchemaRegistryClient( | ||
{ | ||
"url": self.url, | ||
} | ||
) | ||
else: | ||
raise NotImplementedError() | ||
|
||
def get_column_names( | ||
self, database_name: str, table_name: str, include_comment: bool = False | ||
) -> List[str]: | ||
""" | ||
Get the column names from the subject. | ||
:param database_name: not relevant | ||
:param table_name: the subject name | ||
:param include_comment: include the comment | ||
:return: the list of the column names | ||
""" | ||
try: | ||
if not self.connection: | ||
self.get_connection() | ||
registered_schema = self.connection.get_latest_version(table_name) | ||
columns = list() | ||
for field in json.loads(registered_schema.schema.schema_str)["fields"]: | ||
columns.append(field["name"].lower()) | ||
if include_comment and self.comment_field_name in field: | ||
columns.append(field[self.comment_field_name].lower()) | ||
return columns | ||
except Exception as exception: | ||
logger.exception( | ||
f"Error in getting columns name from the Kafka Schema Registry {table_name}" | ||
) | ||
raise exception | ||
|
||
def get_table_names_list(self, database_name: str) -> List[str]: | ||
""" | ||
Get all the subjects from the Schema Registry. | ||
:param database_name: not relevant in that case | ||
:return: the list of the table names of the database | ||
""" | ||
try: | ||
if not self.connection: | ||
self.get_connection() | ||
all_subjects = self.connection.get_subjects() | ||
return all_subjects | ||
except Exception as exception: | ||
logger.exception( | ||
f"Error all the subjects from the subject in the Kafka Schema Registry" | ||
) | ||
raise ExternalMetadataSourceException(exception) | ||
|
||
@property | ||
def type(self) -> str: | ||
""" | ||
The type of the source. | ||
:return: the name of the source. | ||
""" | ||
return "Kafka Schema Registry" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
from unittest.mock import patch | ||
|
||
from confluent_kafka.schema_registry import RegisteredSchema, Schema | ||
|
||
from metadata_guardian.source import ( | ||
KafkaSchemaRegistryAuthentication, | ||
KafkaSchemaRegistrySource, | ||
) | ||
|
||
|
||
@patch("confluent_kafka.schema_registry.SchemaRegistryClient") | ||
def test_kafka_schema_registry_source_get_column_names(mock_connection): | ||
url = "url" | ||
subject_name = "subject_name" | ||
expected = ["key", "value", "doc"] | ||
|
||
source = KafkaSchemaRegistrySource( | ||
url=url, | ||
) | ||
schema_id = "schema_id" | ||
schema_str = """{ | ||
"fields": [ | ||
{ | ||
"name": "key", | ||
"type": "string" | ||
}, | ||
{ | ||
"name": "value", | ||
"type": "string", | ||
"doc": "doc" | ||
} | ||
], | ||
"name": "test_one", | ||
"namespace": "test.one", | ||
"type": "record" | ||
}""" | ||
schema = RegisteredSchema( | ||
schema_id=schema_id, | ||
schema=Schema(schema_str, "AVRO", []), | ||
subject=subject_name, | ||
version=1, | ||
) | ||
mock_connection.get_latest_version.return_value = schema | ||
source.connection = mock_connection | ||
|
||
column_names = source.get_column_names( | ||
database_name=None, table_name=subject_name, include_comment=True | ||
) | ||
|
||
assert column_names == expected | ||
assert source.authenticator == KafkaSchemaRegistryAuthentication.USER_PWD | ||
|
||
|
||
@patch("confluent_kafka.schema_registry.SchemaRegistryClient") | ||
def test_kafka_schema_registry_source_get_table_names_list(mock_connection): | ||
url = "url" | ||
expected = ["subject1", "subject2"] | ||
|
||
source = KafkaSchemaRegistrySource( | ||
url=url, | ||
) | ||
subjects = ["subject1", "subject2"] | ||
mock_connection.get_subjects.return_value = subjects | ||
source.connection = mock_connection | ||
|
||
subjects_list = source.get_table_names_list(database_name=None) | ||
|
||
assert subjects_list == expected | ||
assert source.authenticator == KafkaSchemaRegistryAuthentication.USER_PWD |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters