From be73aa69ffe01dfb43b13a22cfd8bd57bcfe90eb Mon Sep 17 00:00:00 2001 From: Ali Ugur Date: Wed, 29 Jan 2025 14:25:33 +0300 Subject: [PATCH 1/5] Libs(Update): Update all libs to the latest version --- .../data_platform_libs/v0/data_interfaces.py | 530 +++- .../lib/charms/data_platform_libs/v0/s3.py | 791 +++++ .../lib/charms/loki_k8s/v0/loki_push_api.py | 2510 +++++++++++++++ .../lib/charms/loki_k8s/v1/loki_push_api.py | 81 +- .../prometheus_k8s/v0/prometheus_scrape.py | 9 +- .../charm/lib/charms/redis_k8s/v0/redis.py | 24 +- .../lib/charms/saml_integrator/v0/saml.py | 347 ++ .../tempo_coordinator_k8s/v0/tracing.py | 987 ++++++ .../lib/charms/traefik_k8s/v2/ingress.py | 29 +- .../data_platform_libs/v0/data_interfaces.py | 21 +- .../lib/charms/loki_k8s/v1/loki_push_api.py | 2823 +++++++++++++++++ .../prometheus_k8s/v0/prometheus_scrape.py | 9 +- .../charm/lib/charms/redis_k8s/v0/redis.py | 24 +- .../tempo_coordinator_k8s/v0/tracing.py | 987 ++++++ .../lib/charms/traefik_k8s/v2/ingress.py | 29 +- .../data_platform_libs/v0/data_interfaces.py | 530 +++- .../lib/charms/loki_k8s/v0/loki_push_api.py | 23 +- .../lib/charms/loki_k8s/v1/loki_push_api.py | 81 +- .../prometheus_k8s/v0/prometheus_scrape.py | 9 +- .../flask/lib/charms/redis_k8s/v0/redis.py | 24 +- .../lib/charms/saml_integrator/v0/saml.py | 28 +- .../tempo_coordinator_k8s/v0/tracing.py | 987 ++++++ .../lib/charms/traefik_k8s/v2/ingress.py | 29 +- .../data_platform_libs/v0/data_interfaces.py | 530 +++- .../lib/charms/loki_k8s/v0/loki_push_api.py | 23 +- .../lib/charms/loki_k8s/v1/loki_push_api.py | 2823 +++++++++++++++++ .../prometheus_k8s/v0/prometheus_scrape.py | 9 +- .../go/charm/lib/charms/redis_k8s/v0/redis.py | 24 +- .../lib/charms/saml_integrator/v0/saml.py | 28 +- .../tempo_coordinator_k8s/v0/tracing.py | 987 ++++++ .../lib/charms/traefik_k8s/v2/ingress.py | 29 +- 31 files changed, 14884 insertions(+), 481 deletions(-) create mode 100644 examples/django/charm/lib/charms/data_platform_libs/v0/s3.py create mode 100644 examples/django/charm/lib/charms/loki_k8s/v0/loki_push_api.py create mode 100644 examples/django/charm/lib/charms/saml_integrator/v0/saml.py create mode 100644 examples/django/charm/lib/charms/tempo_coordinator_k8s/v0/tracing.py create mode 100644 examples/fastapi/charm/lib/charms/loki_k8s/v1/loki_push_api.py create mode 100644 examples/fastapi/charm/lib/charms/tempo_coordinator_k8s/v0/tracing.py create mode 100644 examples/flask/lib/charms/tempo_coordinator_k8s/v0/tracing.py create mode 100644 examples/go/charm/lib/charms/loki_k8s/v1/loki_push_api.py create mode 100644 examples/go/charm/lib/charms/tempo_coordinator_k8s/v0/tracing.py diff --git a/examples/django/charm/lib/charms/data_platform_libs/v0/data_interfaces.py b/examples/django/charm/lib/charms/data_platform_libs/v0/data_interfaces.py index b331bdc..3bc2dd8 100644 --- a/examples/django/charm/lib/charms/data_platform_libs/v0/data_interfaces.py +++ b/examples/django/charm/lib/charms/data_platform_libs/v0/data_interfaces.py @@ -331,10 +331,14 @@ def _on_topic_requested(self, event: TopicRequestedEvent): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 36 +LIBPATCH = 40 PYDEPS = ["ops>=2.0.0"] +# Starting from what LIBPATCH number to apply legacy solutions +# v0.17 was the last version without secrets +LEGACY_SUPPORT_FROM = 17 + logger = logging.getLogger(__name__) Diff = namedtuple("Diff", "added changed deleted") @@ -351,36 +355,16 @@ def _on_topic_requested(self, event: TopicRequestedEvent): GROUP_MAPPING_FIELD = "secret_group_mapping" GROUP_SEPARATOR = "@" +MODEL_ERRORS = { + "not_leader": "this unit is not the leader", + "no_label_and_uri": "ERROR either URI or label should be used for getting an owned secret but not both", + "owner_no_refresh": "ERROR secret owner cannot use --refresh", +} -class SecretGroup(str): - """Secret groups specific type.""" - - -class SecretGroupsAggregate(str): - """Secret groups with option to extend with additional constants.""" - - def __init__(self): - self.USER = SecretGroup("user") - self.TLS = SecretGroup("tls") - self.EXTRA = SecretGroup("extra") - - def __setattr__(self, name, value): - """Setting internal constants.""" - if name in self.__dict__: - raise RuntimeError("Can't set constant!") - else: - super().__setattr__(name, SecretGroup(value)) - - def groups(self) -> list: - """Return the list of stored SecretGroups.""" - return list(self.__dict__.values()) - - def get_group(self, group: str) -> Optional[SecretGroup]: - """If the input str translates to a group name, return that.""" - return SecretGroup(group) if group in self.groups() else None - -SECRET_GROUPS = SecretGroupsAggregate() +############################################################################## +# Exceptions +############################################################################## class DataInterfacesError(Exception): @@ -407,6 +391,19 @@ class IllegalOperationError(DataInterfacesError): """To be used when an operation is not allowed to be performed.""" +class PrematureDataAccessError(DataInterfacesError): + """To be raised when the Relation Data may be accessed (written) before protocol init complete.""" + + +############################################################################## +# Global helpers / utilities +############################################################################## + +############################################################################## +# Databag handling and comparison methods +############################################################################## + + def get_encoded_dict( relation: Relation, member: Union[Unit, Application], field: str ) -> Optional[Dict[str, str]]: @@ -482,6 +479,11 @@ def diff(event: RelationChangedEvent, bucket: Optional[Union[Unit, Application]] return Diff(added, changed, deleted) +############################################################################## +# Module decorators +############################################################################## + + def leader_only(f): """Decorator to ensure that only leader can perform given operation.""" @@ -536,6 +538,36 @@ def wrapper(self, *args, **kwargs): return wrapper +def legacy_apply_from_version(version: int) -> Callable: + """Decorator to decide whether to apply a legacy function or not. + + Based on LEGACY_SUPPORT_FROM module variable value, the importer charm may only want + to apply legacy solutions starting from a specific LIBPATCH. + + NOTE: All 'legacy' functions have to be defined and called in a way that they return `None`. + This results in cleaner and more secure execution flows in case the function may be disabled. + This requirement implicitly means that legacy functions change the internal state strictly, + don't return information. + """ + + def decorator(f: Callable[..., None]): + """Signature is ensuring None return value.""" + f.legacy_version = version + + def wrapper(self, *args, **kwargs) -> None: + if version >= LEGACY_SUPPORT_FROM: + return f(self, *args, **kwargs) + + return wrapper + + return decorator + + +############################################################################## +# Helper classes +############################################################################## + + class Scope(Enum): """Peer relations scope.""" @@ -543,9 +575,35 @@ class Scope(Enum): UNIT = "unit" -################################################################################ -# Secrets internal caching -################################################################################ +class SecretGroup(str): + """Secret groups specific type.""" + + +class SecretGroupsAggregate(str): + """Secret groups with option to extend with additional constants.""" + + def __init__(self): + self.USER = SecretGroup("user") + self.TLS = SecretGroup("tls") + self.EXTRA = SecretGroup("extra") + + def __setattr__(self, name, value): + """Setting internal constants.""" + if name in self.__dict__: + raise RuntimeError("Can't set constant!") + else: + super().__setattr__(name, SecretGroup(value)) + + def groups(self) -> list: + """Return the list of stored SecretGroups.""" + return list(self.__dict__.values()) + + def get_group(self, group: str) -> Optional[SecretGroup]: + """If the input str translates to a group name, return that.""" + return SecretGroup(group) if group in self.groups() else None + + +SECRET_GROUPS = SecretGroupsAggregate() class CachedSecret: @@ -554,6 +612,8 @@ class CachedSecret: The data structure is precisely re-using/simulating as in the actual Secret Storage """ + KNOWN_MODEL_ERRORS = [MODEL_ERRORS["no_label_and_uri"], MODEL_ERRORS["owner_no_refresh"]] + def __init__( self, model: Model, @@ -571,6 +631,95 @@ def __init__( self.legacy_labels = legacy_labels self.current_label = None + @property + def meta(self) -> Optional[Secret]: + """Getting cached secret meta-information.""" + if not self._secret_meta: + if not (self._secret_uri or self.label): + return + + try: + self._secret_meta = self._model.get_secret(label=self.label) + except SecretNotFoundError: + # Falling back to seeking for potential legacy labels + self._legacy_compat_find_secret_by_old_label() + + # If still not found, to be checked by URI, to be labelled with the proposed label + if not self._secret_meta and self._secret_uri: + self._secret_meta = self._model.get_secret(id=self._secret_uri, label=self.label) + return self._secret_meta + + ########################################################################## + # Backwards compatibility / Upgrades + ########################################################################## + # These functions are used to keep backwards compatibility on rolling upgrades + # Policy: + # All data is kept intact until the first write operation. (This allows a minimal + # grace period during which rollbacks are fully safe. For more info see the spec.) + # All data involves: + # - databag contents + # - secrets content + # - secret labels (!!!) + # Legacy functions must return None, and leave an equally consistent state whether + # they are executed or skipped (as a high enough versioned execution environment may + # not require so) + + # Compatibility + + @legacy_apply_from_version(34) + def _legacy_compat_find_secret_by_old_label(self) -> None: + """Compatibility function, allowing to find a secret by a legacy label. + + This functionality is typically needed when secret labels changed over an upgrade. + Until the first write operation, we need to maintain data as it was, including keeping + the old secret label. In order to keep track of the old label currently used to access + the secret, and additional 'current_label' field is being defined. + """ + for label in self.legacy_labels: + try: + self._secret_meta = self._model.get_secret(label=label) + except SecretNotFoundError: + pass + else: + if label != self.label: + self.current_label = label + return + + # Migrations + + @legacy_apply_from_version(34) + def _legacy_migration_to_new_label_if_needed(self) -> None: + """Helper function to re-create the secret with a different label. + + Juju does not provide a way to change secret labels. + Thus whenever moving from secrets version that involves secret label changes, + we "re-create" the existing secret, and attach the new label to the new + secret, to be used from then on. + + Note: we replace the old secret with a new one "in place", as we can't + easily switch the containing SecretCache structure to point to a new secret. + Instead we are changing the 'self' (CachedSecret) object to point to the + new instance. + """ + if not self.current_label or not (self.meta and self._secret_meta): + return + + # Create a new secret with the new label + content = self._secret_meta.get_content() + self._secret_uri = None + + # It will be nice to have the possibility to check if we are the owners of the secret... + try: + self._secret_meta = self.add_secret(content, label=self.label) + except ModelError as err: + if MODEL_ERRORS["not_leader"] not in str(err): + raise + self.current_label = None + + ########################################################################## + # Public functions + ########################################################################## + def add_secret( self, content: Dict[str, str], @@ -593,28 +742,6 @@ def add_secret( self._secret_meta = secret return self._secret_meta - @property - def meta(self) -> Optional[Secret]: - """Getting cached secret meta-information.""" - if not self._secret_meta: - if not (self._secret_uri or self.label): - return - - for label in [self.label] + self.legacy_labels: - try: - self._secret_meta = self._model.get_secret(label=label) - except SecretNotFoundError: - pass - else: - if label != self.label: - self.current_label = label - break - - # If still not found, to be checked by URI, to be labelled with the proposed label - if not self._secret_meta and self._secret_uri: - self._secret_meta = self._model.get_secret(id=self._secret_uri, label=self.label) - return self._secret_meta - def get_content(self) -> Dict[str, str]: """Getting cached secret content.""" if not self._secret_content: @@ -624,42 +751,25 @@ def get_content(self) -> Dict[str, str]: except (ValueError, ModelError) as err: # https://bugs.launchpad.net/juju/+bug/2042596 # Only triggered when 'refresh' is set - known_model_errors = [ - "ERROR either URI or label should be used for getting an owned secret but not both", - "ERROR secret owner cannot use --refresh", - ] if isinstance(err, ModelError) and not any( - msg in str(err) for msg in known_model_errors + msg in str(err) for msg in self.KNOWN_MODEL_ERRORS ): raise # Due to: ValueError: Secret owner cannot use refresh=True self._secret_content = self.meta.get_content() return self._secret_content - def _move_to_new_label_if_needed(self): - """Helper function to re-create the secret with a different label.""" - if not self.current_label or not (self.meta and self._secret_meta): - return - - # Create a new secret with the new label - content = self._secret_meta.get_content() - self._secret_uri = None - - # I wish we could just check if we are the owners of the secret... - try: - self._secret_meta = self.add_secret(content, label=self.label) - except ModelError as err: - if "this unit is not the leader" not in str(err): - raise - self.current_label = None - def set_content(self, content: Dict[str, str]) -> None: """Setting cached secret content.""" if not self.meta: return + # DPE-4182: do not create new revision if the content stay the same + if content == self.get_content(): + return + if content: - self._move_to_new_label_if_needed() + self._legacy_migration_to_new_label_if_needed() self.meta.set_content(content) self._secret_content = content else: @@ -922,6 +1032,23 @@ def _delete_relation_data(self, relation: Relation, fields: List[str]) -> None: """Delete data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" raise NotImplementedError + # Optional overrides + + def _legacy_apply_on_fetch(self) -> None: + """This function should provide a list of compatibility functions to be applied when fetching (legacy) data.""" + pass + + def _legacy_apply_on_update(self, fields: List[str]) -> None: + """This function should provide a list of compatibility functions to be applied when writing data. + + Since data may be at a legacy version, migration may be mandatory. + """ + pass + + def _legacy_apply_on_delete(self, fields: List[str]) -> None: + """This function should provide a list of compatibility functions to be applied when deleting (legacy) data.""" + pass + # Internal helper methods @staticmethod @@ -1174,6 +1301,16 @@ def get_relation(self, relation_name, relation_id) -> Relation: return relation + def get_secret_uri(self, relation: Relation, group: SecretGroup) -> Optional[str]: + """Get the secret URI for the corresponding group.""" + secret_field = self._generate_secret_field_name(group) + return relation.data[self.component].get(secret_field) + + def set_secret_uri(self, relation: Relation, group: SecretGroup, secret_uri: str) -> None: + """Set the secret URI for the corresponding group.""" + secret_field = self._generate_secret_field_name(group) + relation.data[self.component][secret_field] = secret_uri + def fetch_relation_data( self, relation_ids: Optional[List[int]] = None, @@ -1190,6 +1327,8 @@ def fetch_relation_data( a dict of the values stored in the relation data bag for all relation instances (indexed by the relation ID). """ + self._legacy_apply_on_fetch() + if not relation_name: relation_name = self.relation_name @@ -1228,6 +1367,8 @@ def fetch_my_relation_data( NOTE: Since only the leader can read the relation's 'this_app'-side Application databag, the functionality is limited to leaders """ + self._legacy_apply_on_fetch() + if not relation_name: relation_name = self.relation_name @@ -1259,6 +1400,8 @@ def fetch_my_relation_field( @leader_only def update_relation_data(self, relation_id: int, data: dict) -> None: """Update the data within the relation.""" + self._legacy_apply_on_update(list(data.keys())) + relation_name = self.relation_name relation = self.get_relation(relation_name, relation_id) return self._update_relation_data(relation, data) @@ -1266,6 +1409,8 @@ def update_relation_data(self, relation_id: int, data: dict) -> None: @leader_only def delete_relation_data(self, relation_id: int, fields: List[str]) -> None: """Remove field from the relation.""" + self._legacy_apply_on_delete(fields) + relation_name = self.relation_name relation = self.get_relation(relation_name, relation_id) return self._delete_relation_data(relation, fields) @@ -1312,6 +1457,8 @@ def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: class ProviderData(Data): """Base provides-side of the data products relation.""" + RESOURCE_FIELD = "database" + def __init__( self, model: Model, @@ -1332,8 +1479,7 @@ def _add_relation_secret( uri_to_databag=True, ) -> bool: """Add a new Juju Secret that will be registered in the relation databag.""" - secret_field = self._generate_secret_field_name(group_mapping) - if uri_to_databag and relation.data[self.component].get(secret_field): + if uri_to_databag and self.get_secret_uri(relation, group_mapping): logging.error("Secret for relation %s already exists, not adding again", relation.id) return False @@ -1344,7 +1490,7 @@ def _add_relation_secret( # According to lint we may not have a Secret ID if uri_to_databag and secret.meta and secret.meta.id: - relation.data[self.component][secret_field] = secret.meta.id + self.set_secret_uri(relation, group_mapping, secret.meta.id) # Return the content that was added return True @@ -1445,8 +1591,7 @@ def _get_relation_secret( if not relation: return - secret_field = self._generate_secret_field_name(group_mapping) - if secret_uri := relation.data[self.local_app].get(secret_field): + if secret_uri := self.get_secret_uri(relation, group_mapping): return self.secrets.get(label, secret_uri) def _fetch_specific_relation_data( @@ -1479,6 +1624,15 @@ def _fetch_my_specific_relation_data( def _update_relation_data(self, relation: Relation, data: Dict[str, str]) -> None: """Set values for fields not caring whether it's a secret or not.""" req_secret_fields = [] + + keys = set(data.keys()) + if self.fetch_relation_field(relation.id, self.RESOURCE_FIELD) is None and ( + keys - {"endpoints", "read-only-endpoints", "replset"} + ): + raise PrematureDataAccessError( + "Premature access to relation data, update is forbidden before the connection is initialized." + ) + if relation.app: req_secret_fields = get_encoded_list(relation, relation.app, REQ_SECRET_FIELDS) @@ -1599,11 +1753,10 @@ def _register_secrets_to_relation(self, relation: Relation, params_name_list: Li for group in SECRET_GROUPS.groups(): secret_field = self._generate_secret_field_name(group) - if secret_field in params_name_list: - if secret_uri := relation.data[relation.app].get(secret_field): - self._register_secret_to_relation( - relation.name, relation.id, secret_uri, group - ) + if secret_field in params_name_list and ( + secret_uri := self.get_secret_uri(relation, group) + ): + self._register_secret_to_relation(relation.name, relation.id, secret_uri, group) def _is_resource_created_for_relation(self, relation: Relation) -> bool: if not relation.app: @@ -1614,6 +1767,17 @@ def _is_resource_created_for_relation(self, relation: Relation) -> bool: ) return bool(data.get("username")) and bool(data.get("password")) + # Public functions + + def get_secret_uri(self, relation: Relation, group: SecretGroup) -> Optional[str]: + """Getting relation secret URI for the corresponding Secret Group.""" + secret_field = self._generate_secret_field_name(group) + return relation.data[relation.app].get(secret_field) + + def set_secret_uri(self, relation: Relation, group: SecretGroup, uri: str) -> None: + """Setting relation secret URI is not possible for a Requirer.""" + raise NotImplementedError("Requirer can not change the relation secret URI.") + def is_resource_created(self, relation_id: Optional[int] = None) -> bool: """Check if the resource has been created. @@ -1764,7 +1928,6 @@ def __init__( secret_field_name: Optional[str] = None, deleted_label: Optional[str] = None, ): - """Manager of base client relations.""" RequirerData.__init__( self, model, @@ -1775,6 +1938,11 @@ def __init__( self.secret_field_name = secret_field_name if secret_field_name else self.SECRET_FIELD_NAME self.deleted_label = deleted_label self._secret_label_map = {} + + # Legacy information holders + self._legacy_labels = [] + self._legacy_secret_uri = None + # Secrets that are being dynamically added within the scope of this event handler run self._new_secrets = [] self._additional_secret_group_mapping = additional_secret_group_mapping @@ -1849,10 +2017,12 @@ def set_secret( value: The string value of the secret group_mapping: The name of the "secret group", in case the field is to be added to an existing secret """ + self._legacy_apply_on_update([field]) + full_field = self._field_to_internal_name(field, group_mapping) if self.secrets_enabled and full_field not in self.current_secret_fields: self._new_secrets.append(full_field) - if self._no_group_with_databag(field, full_field): + if self.valid_field_pattern(field, full_field): self.update_relation_data(relation_id, {full_field: value}) # Unlike for set_secret(), there's no harm using this operation with static secrets @@ -1865,6 +2035,8 @@ def get_secret( group_mapping: Optional[SecretGroup] = None, ) -> Optional[str]: """Public interface method to fetch secrets only.""" + self._legacy_apply_on_fetch() + full_field = self._field_to_internal_name(field, group_mapping) if ( self.secrets_enabled @@ -1872,7 +2044,7 @@ def get_secret( and field not in self.current_secret_fields ): return - if self._no_group_with_databag(field, full_field): + if self.valid_field_pattern(field, full_field): return self.fetch_my_relation_field(relation_id, full_field) @dynamic_secrets_only @@ -1883,14 +2055,19 @@ def delete_secret( group_mapping: Optional[SecretGroup] = None, ) -> Optional[str]: """Public interface method to delete secrets only.""" + self._legacy_apply_on_delete([field]) + full_field = self._field_to_internal_name(field, group_mapping) if self.secrets_enabled and full_field not in self.current_secret_fields: logger.warning(f"Secret {field} from group {group_mapping} was not found") return - if self._no_group_with_databag(field, full_field): + + if self.valid_field_pattern(field, full_field): self.delete_relation_data(relation_id, [full_field]) + ########################################################################## # Helpers + ########################################################################## @staticmethod def _field_to_internal_name(field: str, group: Optional[SecretGroup]) -> str: @@ -1932,10 +2109,69 @@ def _content_for_secret_group( if k in self.secret_fields } - # Backwards compatibility + def valid_field_pattern(self, field: str, full_field: str) -> bool: + """Check that no secret group is attempted to be used together without secrets being enabled. + + Secrets groups are impossible to use with versions that are not yet supporting secrets. + """ + if not self.secrets_enabled and full_field != field: + logger.error( + f"Can't access {full_field}: no secrets available (i.e. no secret groups either)." + ) + return False + return True + + ########################################################################## + # Backwards compatibility / Upgrades + ########################################################################## + # These functions are used to keep backwards compatibility on upgrades + # Policy: + # All data is kept intact until the first write operation. (This allows a minimal + # grace period during which rollbacks are fully safe. For more info see spec.) + # All data involves: + # - databag + # - secrets content + # - secret labels (!!!) + # Legacy functions must return None, and leave an equally consistent state whether + # they are executed or skipped (as a high enough versioned execution environment may + # not require so) + + # Full legacy stack for each operation + + def _legacy_apply_on_fetch(self) -> None: + """All legacy functions to be applied on fetch.""" + relation = self._model.relations[self.relation_name][0] + self._legacy_compat_generate_prev_labels() + self._legacy_compat_secret_uri_from_databag(relation) + + def _legacy_apply_on_update(self, fields) -> None: + """All legacy functions to be applied on update.""" + relation = self._model.relations[self.relation_name][0] + self._legacy_compat_generate_prev_labels() + self._legacy_compat_secret_uri_from_databag(relation) + self._legacy_migration_remove_secret_from_databag(relation, fields) + self._legacy_migration_remove_secret_field_name_from_databag(relation) + + def _legacy_apply_on_delete(self, fields) -> None: + """All legacy functions to be applied on delete.""" + relation = self._model.relations[self.relation_name][0] + self._legacy_compat_generate_prev_labels() + self._legacy_compat_secret_uri_from_databag(relation) + self._legacy_compat_check_deleted_label(relation, fields) + + # Compatibility + + @legacy_apply_from_version(18) + def _legacy_compat_check_deleted_label(self, relation, fields) -> None: + """Helper function for legacy behavior. + + As long as https://bugs.launchpad.net/juju/+bug/2028094 wasn't fixed, + we did not delete fields but rather kept them in the secret with a string value + expressing invalidity. This function is maintainnig that behavior when needed. + """ + if not self.deleted_label: + return - def _check_deleted_label(self, relation, fields) -> None: - """Helper function for legacy behavior.""" current_data = self.fetch_my_relation_data([relation.id], fields) if current_data is not None: # Check if the secret we wanna delete actually exists @@ -1948,7 +2184,43 @@ def _check_deleted_label(self, relation, fields) -> None: ", ".join(non_existent), ) - def _remove_secret_from_databag(self, relation, fields: List[str]) -> None: + @legacy_apply_from_version(18) + def _legacy_compat_secret_uri_from_databag(self, relation) -> None: + """Fetching the secret URI from the databag, in case stored there.""" + self._legacy_secret_uri = relation.data[self.component].get( + self._generate_secret_field_name(), None + ) + + @legacy_apply_from_version(34) + def _legacy_compat_generate_prev_labels(self) -> None: + """Generator for legacy secret label names, for backwards compatibility. + + Secret label is part of the data that MUST be maintained across rolling upgrades. + In case there may be a change on a secret label, the old label must be recognized + after upgrades, and left intact until the first write operation -- when we roll over + to the new label. + + This function keeps "memory" of previously used secret labels. + NOTE: Return value takes decorator into account -- all 'legacy' functions may return `None` + + v0.34 (rev69): Fixing issue https://github.com/canonical/data-platform-libs/issues/155 + meant moving from '.' (i.e. 'mysql.app', 'mysql.unit') + to labels '..' (like 'peer.mysql.app') + """ + if self._legacy_labels: + return + + result = [] + members = [self._model.app.name] + if self.scope: + members.append(self.scope.value) + result.append(f"{'.'.join(members)}") + self._legacy_labels = result + + # Migration + + @legacy_apply_from_version(18) + def _legacy_migration_remove_secret_from_databag(self, relation, fields: List[str]) -> None: """For Rolling Upgrades -- when moving from databag to secrets usage. Practically what happens here is to remove stuff from the databag that is @@ -1962,10 +2234,16 @@ def _remove_secret_from_databag(self, relation, fields: List[str]) -> None: if self._fetch_relation_data_without_secrets(self.component, relation, [field]): self._delete_relation_data_without_secrets(self.component, relation, [field]) - def _remove_secret_field_name_from_databag(self, relation) -> None: + @legacy_apply_from_version(18) + def _legacy_migration_remove_secret_field_name_from_databag(self, relation) -> None: """Making sure that the old databag URI is gone. This action should not be executed more than once. + + There was a phase (before moving secrets usage to libs) when charms saved the peer + secret URI to the databag, and used this URI from then on to retrieve their secret. + When upgrading to charm versions using this library, we need to add a label to the + secret and access it via label from than on, and remove the old traces from the databag. """ # Nothing to do if 'internal-secret' is not in the databag if not (relation.data[self.component].get(self._generate_secret_field_name())): @@ -1981,25 +2259,9 @@ def _remove_secret_field_name_from_databag(self, relation) -> None: # Databag reference to the secret URI can be removed, now that it's labelled relation.data[self.component].pop(self._generate_secret_field_name(), None) - def _previous_labels(self) -> List[str]: - """Generator for legacy secret label names, for backwards compatibility.""" - result = [] - members = [self._model.app.name] - if self.scope: - members.append(self.scope.value) - result.append(f"{'.'.join(members)}") - return result - - def _no_group_with_databag(self, field: str, full_field: str) -> bool: - """Check that no secret group is attempted to be used together with databag.""" - if not self.secrets_enabled and full_field != field: - logger.error( - f"Can't access {full_field}: no secrets available (i.e. no secret groups either)." - ) - return False - return True - + ########################################################################## # Event handlers + ########################################################################## def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: """Event emitted when the relation has changed.""" @@ -2009,7 +2271,9 @@ def _on_secret_changed_event(self, event: SecretChangedEvent) -> None: """Event emitted when the secret has changed.""" pass + ########################################################################## # Overrides of Relation Data handling functions + ########################################################################## def _generate_secret_label( self, relation_name: str, relation_id: int, group_mapping: SecretGroup @@ -2046,13 +2310,14 @@ def _get_relation_secret( return label = self._generate_secret_label(relation_name, relation_id, group_mapping) - secret_uri = relation.data[self.component].get(self._generate_secret_field_name(), None) # URI or legacy label is only to applied when moving single legacy secret to a (new) label if group_mapping == SECRET_GROUPS.EXTRA: # Fetching the secret with fallback to URI (in case label is not yet known) # Label would we "stuck" on the secret in case it is found - return self.secrets.get(label, secret_uri, legacy_labels=self._previous_labels()) + return self.secrets.get( + label, self._legacy_secret_uri, legacy_labels=self._legacy_labels + ) return self.secrets.get(label) def _get_group_secret_contents( @@ -2082,7 +2347,6 @@ def _fetch_my_specific_relation_data( @either_static_or_dynamic_secrets def _update_relation_data(self, relation: Relation, data: Dict[str, str]) -> None: """Update data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" - self._remove_secret_from_databag(relation, list(data.keys())) _, normal_fields = self._process_secret_fields( relation, self.secret_fields, @@ -2091,7 +2355,6 @@ def _update_relation_data(self, relation: Relation, data: Dict[str, str]) -> Non data=data, uri_to_databag=False, ) - self._remove_secret_field_name_from_databag(relation) normal_content = {k: v for k, v in data.items() if k in normal_fields} self._update_relation_data_without_secrets(self.component, relation, normal_content) @@ -2100,8 +2363,6 @@ def _update_relation_data(self, relation: Relation, data: Dict[str, str]) -> Non def _delete_relation_data(self, relation: Relation, fields: List[str]) -> None: """Delete data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" if self.secret_fields and self.deleted_label: - # Legacy, backwards compatibility - self._check_deleted_label(relation, fields) _, normal_fields = self._process_secret_fields( relation, @@ -2137,7 +2398,9 @@ def fetch_relation_field( "fetch_my_relation_data() and fetch_my_relation_field()" ) + ########################################################################## # Public functions -- inherited + ########################################################################## fetch_my_relation_data = Data.fetch_my_relation_data fetch_my_relation_field = Data.fetch_my_relation_field @@ -2602,6 +2865,14 @@ def set_version(self, relation_id: int, version: str) -> None: """ self.update_relation_data(relation_id, {"version": version}) + def set_subordinated(self, relation_id: int) -> None: + """Raises the subordinated flag in the application relation databag. + + Args: + relation_id: the identifier for a particular relation. + """ + self.update_relation_data(relation_id, {"subordinated": "true"}) + class DatabaseProviderEventHandlers(EventHandlers): """Provider-side of the database relation handlers.""" @@ -2838,6 +3109,21 @@ def _on_relation_created_event(self, event: RelationCreatedEvent) -> None: def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: """Event emitted when the database relation has changed.""" + is_subordinate = False + remote_unit_data = None + for key in event.relation.data.keys(): + if isinstance(key, Unit) and not key.name.startswith(self.charm.app.name): + remote_unit_data = event.relation.data[key] + elif isinstance(key, Application) and key.name != self.charm.app.name: + is_subordinate = event.relation.data[key].get("subordinated") == "true" + + if is_subordinate: + if not remote_unit_data: + return + + if remote_unit_data.get("state") != "ready": + return + # Check which data has changed to emit customs events. diff = self._diff(event) @@ -3019,6 +3305,8 @@ class KafkaRequiresEvents(CharmEvents): class KafkaProviderData(ProviderData): """Provider-side of the Kafka relation.""" + RESOURCE_FIELD = "topic" + def __init__(self, model: Model, relation_name: str) -> None: super().__init__(model, relation_name) @@ -3268,6 +3556,8 @@ class OpenSearchRequiresEvents(CharmEvents): class OpenSearchProvidesData(ProviderData): """Provider-side of the OpenSearch relation.""" + RESOURCE_FIELD = "index" + def __init__(self, model: Model, relation_name: str) -> None: super().__init__(model, relation_name) diff --git a/examples/django/charm/lib/charms/data_platform_libs/v0/s3.py b/examples/django/charm/lib/charms/data_platform_libs/v0/s3.py new file mode 100644 index 0000000..f5614aa --- /dev/null +++ b/examples/django/charm/lib/charms/data_platform_libs/v0/s3.py @@ -0,0 +1,791 @@ +# Copyright 2023 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r"""A library for communicating with the S3 credentials providers and consumers. + +This library provides the relevant interface code implementing the communication +specification for fetching, retrieving, triggering, and responding to events related to +the S3 provider charm and its consumers. + +### Provider charm + +The provider is implemented in the `s3-provider` charm which is meant to be deployed +alongside one or more consumer charms. The provider charm is serving the s3 credentials and +metadata needed to communicate and work with an S3 compatible backend. + +Example: +```python + +from charms.data_platform_libs.v0.s3 import CredentialRequestedEvent, S3Provider + + +class ExampleProviderCharm(CharmBase): + def __init__(self, *args) -> None: + super().__init__(*args) + self.s3_provider = S3Provider(self, "s3-credentials") + + self.framework.observe(self.s3_provider.on.credentials_requested, + self._on_credential_requested) + + def _on_credential_requested(self, event: CredentialRequestedEvent): + if not self.unit.is_leader(): + return + + # get relation id + relation_id = event.relation.id + + # get bucket name + bucket = event.bucket + + # S3 configuration parameters + desired_configuration = {"access-key": "your-access-key", "secret-key": + "your-secret-key", "bucket": "your-bucket"} + + # update the configuration + self.s3_provider.update_connection_info(relation_id, desired_configuration) + + # or it is possible to set each field independently + + self.s3_provider.set_secret_key(relation_id, "your-secret-key") + + +if __name__ == "__main__": + main(ExampleProviderCharm) + + +### Requirer charm + +The requirer charm is the charm requiring the S3 credentials. +An example of requirer charm is the following: + +Example: +```python + +from charms.data_platform_libs.v0.s3 import ( + CredentialsChangedEvent, + CredentialsGoneEvent, + S3Requirer +) + +class ExampleRequirerCharm(CharmBase): + + def __init__(self, *args): + super().__init__(*args) + + bucket_name = "test-bucket" + # if bucket name is not provided the bucket name will be generated + # e.g., ('relation-{relation.id}') + + self.s3_client = S3Requirer(self, "s3-credentials", bucket_name) + + self.framework.observe(self.s3_client.on.credentials_changed, self._on_credential_changed) + self.framework.observe(self.s3_client.on.credentials_gone, self._on_credential_gone) + + def _on_credential_changed(self, event: CredentialsChangedEvent): + + # access single parameter credential + secret_key = event.secret_key + access_key = event.access_key + + # or as alternative all credentials can be collected as a dictionary + credentials = self.s3_client.get_s3_credentials() + + def _on_credential_gone(self, event: CredentialsGoneEvent): + # credentials are removed + pass + + if __name__ == "__main__": + main(ExampleRequirerCharm) +``` + +""" +import json +import logging +from collections import namedtuple +from typing import Dict, List, Optional, Union + +import ops.charm +import ops.framework +import ops.model +from ops.charm import ( + CharmBase, + CharmEvents, + RelationBrokenEvent, + RelationChangedEvent, + RelationEvent, + RelationJoinedEvent, +) +from ops.framework import EventSource, Object, ObjectEvents +from ops.model import Application, Relation, RelationDataContent, Unit + +# The unique Charmhub library identifier, never change it +LIBID = "fca396f6254246c9bfa565b1f85ab528" + +# Increment this major API version when introducing breaking changes +LIBAPI = 0 + +# Increment this PATCH version before using `charmcraft publish-lib` or reset +# to 0 if you are raising the major API version +LIBPATCH = 5 + +logger = logging.getLogger(__name__) + +Diff = namedtuple("Diff", "added changed deleted") +Diff.__doc__ = """ +A tuple for storing the diff between two data mappings. + +added - keys that were added +changed - keys that still exist but have new values +deleted - key that were deleted""" + + +def diff(event: RelationChangedEvent, bucket: Union[Unit, Application]) -> Diff: + """Retrieves the diff of the data in the relation changed databag. + + Args: + event: relation changed event. + bucket: bucket of the databag (app or unit) + + Returns: + a Diff instance containing the added, deleted and changed + keys from the event relation databag. + """ + # Retrieve the old data from the data key in the application relation databag. + old_data = json.loads(event.relation.data[bucket].get("data", "{}")) + # Retrieve the new data from the event relation databag. + new_data = ( + {key: value for key, value in event.relation.data[event.app].items() if key != "data"} + if event.app + else {} + ) + + # These are the keys that were added to the databag and triggered this event. + added = new_data.keys() - old_data.keys() + # These are the keys that were removed from the databag and triggered this event. + deleted = old_data.keys() - new_data.keys() + # These are the keys that already existed in the databag, + # but had their values changed. + changed = {key for key in old_data.keys() & new_data.keys() if old_data[key] != new_data[key]} + + # TODO: evaluate the possibility of losing the diff if some error + # happens in the charm before the diff is completely checked (DPE-412). + # Convert the new_data to a serializable format and save it for a next diff check. + event.relation.data[bucket].update({"data": json.dumps(new_data)}) + + # Return the diff with all possible changes. + return Diff(added, changed, deleted) + + +class BucketEvent(RelationEvent): + """Base class for bucket events.""" + + @property + def bucket(self) -> Optional[str]: + """Returns the bucket was requested.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("bucket", "") + + +class CredentialRequestedEvent(BucketEvent): + """Event emitted when a set of credential is requested for use on this relation.""" + + +class S3CredentialEvents(CharmEvents): + """Event descriptor for events raised by S3Provider.""" + + credentials_requested = EventSource(CredentialRequestedEvent) + + +class S3Provider(Object): + """A provider handler for communicating S3 credentials to consumers.""" + + on = S3CredentialEvents() # pyright: ignore [reportAssignmentType] + + def __init__( + self, + charm: CharmBase, + relation_name: str, + ): + super().__init__(charm, relation_name) + self.charm = charm + self.local_app = self.charm.model.app + self.local_unit = self.charm.unit + self.relation_name = relation_name + + # monitor relation changed event for changes in the credentials + self.framework.observe(charm.on[relation_name].relation_changed, self._on_relation_changed) + + def _on_relation_changed(self, event: RelationChangedEvent) -> None: + """React to the relation changed event by consuming data.""" + if not self.charm.unit.is_leader(): + return + diff = self._diff(event) + # emit on credential requested if bucket is provided by the requirer application + if "bucket" in diff.added: + getattr(self.on, "credentials_requested").emit( + event.relation, app=event.app, unit=event.unit + ) + + def _load_relation_data(self, raw_relation_data: dict) -> dict: + """Loads relation data from the relation data bag. + + Args: + raw_relation_data: Relation data from the databag + Returns: + dict: Relation data in dict format. + """ + connection_data = {} + for key in raw_relation_data: + try: + connection_data[key] = json.loads(raw_relation_data[key]) + except (json.decoder.JSONDecodeError, TypeError): + connection_data[key] = raw_relation_data[key] + return connection_data + + # def _diff(self, event: RelationChangedEvent) -> Diff: + # """Retrieves the diff of the data in the relation changed databag. + + # Args: + # event: relation changed event. + + # Returns: + # a Diff instance containing the added, deleted and changed + # keys from the event relation databag. + # """ + # # Retrieve the old data from the data key in the application relation databag. + # old_data = json.loads(event.relation.data[self.local_app].get("data", "{}")) + # # Retrieve the new data from the event relation databag. + # new_data = { + # key: value for key, value in event.relation.data[event.app].items() if key != "data" + # } + + # # These are the keys that were added to the databag and triggered this event. + # added = new_data.keys() - old_data.keys() + # # These are the keys that were removed from the databag and triggered this event. + # deleted = old_data.keys() - new_data.keys() + # # These are the keys that already existed in the databag, + # # but had their values changed. + # changed = { + # key for key in old_data.keys() & new_data.keys() if old_data[key] != new_data[key] + # } + + # # TODO: evaluate the possibility of losing the diff if some error + # # happens in the charm before the diff is completely checked (DPE-412). + # # Convert the new_data to a serializable format and save it for a next diff check. + # event.relation.data[self.local_app].update({"data": json.dumps(new_data)}) + + # # Return the diff with all possible changes. + # return Diff(added, changed, deleted) + + def _diff(self, event: RelationChangedEvent) -> Diff: + """Retrieves the diff of the data in the relation changed databag. + + Args: + event: relation changed event. + + Returns: + a Diff instance containing the added, deleted and changed + keys from the event relation databag. + """ + return diff(event, self.local_app) + + def fetch_relation_data(self) -> dict: + """Retrieves data from relation. + + This function can be used to retrieve data from a relation + in the charm code when outside an event callback. + + Returns: + a dict of the values stored in the relation data bag + for all relation instances (indexed by the relation id). + """ + data = {} + for relation in self.relations: + data[relation.id] = ( + {key: value for key, value in relation.data[relation.app].items() if key != "data"} + if relation.app + else {} + ) + return data + + def update_connection_info(self, relation_id: int, connection_data: dict) -> None: + """Updates the credential data as set of key-value pairs in the relation. + + This function writes in the application data bag, therefore, + only the leader unit can call it. + + Args: + relation_id: the identifier for a particular relation. + connection_data: dict containing the key-value pairs + that should be updated. + """ + # check and write changes only if you are the leader + if not self.local_unit.is_leader(): + return + + relation = self.charm.model.get_relation(self.relation_name, relation_id) + + if not relation: + return + + # configuration options that are list + s3_list_options = ["attributes", "tls-ca-chain"] + + # update the databag, if connection data did not change with respect to before + # the relation changed event is not triggered + updated_connection_data = {} + for configuration_option, configuration_value in connection_data.items(): + if configuration_option in s3_list_options: + updated_connection_data[configuration_option] = json.dumps(configuration_value) + else: + updated_connection_data[configuration_option] = configuration_value + + relation.data[self.local_app].update(updated_connection_data) + logger.debug(f"Updated S3 connection info: {updated_connection_data}") + + @property + def relations(self) -> List[Relation]: + """The list of Relation instances associated with this relation_name.""" + return list(self.charm.model.relations[self.relation_name]) + + def set_bucket(self, relation_id: int, bucket: str) -> None: + """Sets bucket name in application databag. + + This function writes in the application data bag, therefore, + only the leader unit can call it. + + Args: + relation_id: the identifier for a particular relation. + bucket: the bucket name. + """ + self.update_connection_info(relation_id, {"bucket": bucket}) + + def set_access_key(self, relation_id: int, access_key: str) -> None: + """Sets access-key value in application databag. + + This function writes in the application data bag, therefore, + only the leader unit can call it. + + Args: + relation_id: the identifier for a particular relation. + access_key: the access-key value. + """ + self.update_connection_info(relation_id, {"access-key": access_key}) + + def set_secret_key(self, relation_id: int, secret_key: str) -> None: + """Sets the secret key value in application databag. + + This function writes in the application data bag, therefore, + only the leader unit can call it. + + Args: + relation_id: the identifier for a particular relation. + secret_key: the value of the secret key. + """ + self.update_connection_info(relation_id, {"secret-key": secret_key}) + + def set_path(self, relation_id: int, path: str) -> None: + """Sets the path value in application databag. + + This function writes in the application data bag, therefore, + only the leader unit can call it. + + Args: + relation_id: the identifier for a particular relation. + path: the path value. + """ + self.update_connection_info(relation_id, {"path": path}) + + def set_endpoint(self, relation_id: int, endpoint: str) -> None: + """Sets the endpoint address in application databag. + + This function writes in the application data bag, therefore, + only the leader unit can call it. + + Args: + relation_id: the identifier for a particular relation. + endpoint: the endpoint address. + """ + self.update_connection_info(relation_id, {"endpoint": endpoint}) + + def set_region(self, relation_id: int, region: str) -> None: + """Sets the region location in application databag. + + This function writes in the application data bag, therefore, + only the leader unit can call it. + + Args: + relation_id: the identifier for a particular relation. + region: the region address. + """ + self.update_connection_info(relation_id, {"region": region}) + + def set_s3_uri_style(self, relation_id: int, s3_uri_style: str) -> None: + """Sets the S3 URI style in application databag. + + This function writes in the application data bag, therefore, + only the leader unit can call it. + + Args: + relation_id: the identifier for a particular relation. + s3_uri_style: the s3 URI style. + """ + self.update_connection_info(relation_id, {"s3-uri-style": s3_uri_style}) + + def set_storage_class(self, relation_id: int, storage_class: str) -> None: + """Sets the storage class in application databag. + + This function writes in the application data bag, therefore, + only the leader unit can call it. + + Args: + relation_id: the identifier for a particular relation. + storage_class: the storage class. + """ + self.update_connection_info(relation_id, {"storage-class": storage_class}) + + def set_tls_ca_chain(self, relation_id: int, tls_ca_chain: List[str]) -> None: + """Sets the tls_ca_chain value in application databag. + + This function writes in the application data bag, therefore, + only the leader unit can call it. + + Args: + relation_id: the identifier for a particular relation. + tls_ca_chain: the TLS Chain value. + """ + self.update_connection_info(relation_id, {"tls-ca-chain": tls_ca_chain}) + + def set_s3_api_version(self, relation_id: int, s3_api_version: str) -> None: + """Sets the S3 API version in application databag. + + This function writes in the application data bag, therefore, + only the leader unit can call it. + + Args: + relation_id: the identifier for a particular relation. + s3_api_version: the S3 version value. + """ + self.update_connection_info(relation_id, {"s3-api-version": s3_api_version}) + + def set_delete_older_than_days(self, relation_id: int, days: int) -> None: + """Sets the retention days for full backups in application databag. + + This function writes in the application data bag, therefore, + only the leader unit can call it. + + Args: + relation_id: the identifier for a particular relation. + days: the value. + """ + self.update_connection_info(relation_id, {"delete-older-than-days": str(days)}) + + def set_attributes(self, relation_id: int, attributes: List[str]) -> None: + """Sets the connection attributes in application databag. + + This function writes in the application data bag, therefore, + only the leader unit can call it. + + Args: + relation_id: the identifier for a particular relation. + attributes: the attributes value. + """ + self.update_connection_info(relation_id, {"attributes": attributes}) + + +class S3Event(RelationEvent): + """Base class for S3 storage events.""" + + @property + def bucket(self) -> Optional[str]: + """Returns the bucket name.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("bucket") + + @property + def access_key(self) -> Optional[str]: + """Returns the access key.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("access-key") + + @property + def secret_key(self) -> Optional[str]: + """Returns the secret key.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("secret-key") + + @property + def path(self) -> Optional[str]: + """Returns the path where data can be stored.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("path") + + @property + def endpoint(self) -> Optional[str]: + """Returns the endpoint address.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("endpoint") + + @property + def region(self) -> Optional[str]: + """Returns the region.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("region") + + @property + def s3_uri_style(self) -> Optional[str]: + """Returns the s3 uri style.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("s3-uri-style") + + @property + def storage_class(self) -> Optional[str]: + """Returns the storage class name.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("storage-class") + + @property + def tls_ca_chain(self) -> Optional[List[str]]: + """Returns the TLS CA chain.""" + if not self.relation.app: + return None + + tls_ca_chain = self.relation.data[self.relation.app].get("tls-ca-chain") + if tls_ca_chain is not None: + return json.loads(tls_ca_chain) + return None + + @property + def s3_api_version(self) -> Optional[str]: + """Returns the S3 API version.""" + if not self.relation.app: + return None + + return self.relation.data[self.relation.app].get("s3-api-version") + + @property + def delete_older_than_days(self) -> Optional[int]: + """Returns the retention days for full backups.""" + if not self.relation.app: + return None + + days = self.relation.data[self.relation.app].get("delete-older-than-days") + if days is None: + return None + return int(days) + + @property + def attributes(self) -> Optional[List[str]]: + """Returns the attributes.""" + if not self.relation.app: + return None + + attributes = self.relation.data[self.relation.app].get("attributes") + if attributes is not None: + return json.loads(attributes) + return None + + +class CredentialsChangedEvent(S3Event): + """Event emitted when S3 credential are changed on this relation.""" + + +class CredentialsGoneEvent(RelationEvent): + """Event emitted when S3 credential are removed from this relation.""" + + +class S3CredentialRequiresEvents(ObjectEvents): + """Event descriptor for events raised by the S3Provider.""" + + credentials_changed = EventSource(CredentialsChangedEvent) + credentials_gone = EventSource(CredentialsGoneEvent) + + +S3_REQUIRED_OPTIONS = ["access-key", "secret-key"] + + +class S3Requirer(Object): + """Requires-side of the s3 relation.""" + + on = S3CredentialRequiresEvents() # pyright: ignore[reportAssignmentType] + + def __init__( + self, charm: ops.charm.CharmBase, relation_name: str, bucket_name: Optional[str] = None + ): + """Manager of the s3 client relations.""" + super().__init__(charm, relation_name) + + self.relation_name = relation_name + self.charm = charm + self.local_app = self.charm.model.app + self.local_unit = self.charm.unit + self.bucket = bucket_name + + self.framework.observe( + self.charm.on[self.relation_name].relation_changed, self._on_relation_changed + ) + + self.framework.observe( + self.charm.on[self.relation_name].relation_joined, self._on_relation_joined + ) + + self.framework.observe( + self.charm.on[self.relation_name].relation_broken, + self._on_relation_broken, + ) + + def _generate_bucket_name(self, event: RelationJoinedEvent): + """Returns the bucket name generated from relation id.""" + return f"relation-{event.relation.id}" + + def _on_relation_joined(self, event: RelationJoinedEvent) -> None: + """Event emitted when the application joins the s3 relation.""" + if self.bucket is None: + self.bucket = self._generate_bucket_name(event) + self.update_connection_info(event.relation.id, {"bucket": self.bucket}) + + def fetch_relation_data(self) -> dict: + """Retrieves data from relation. + + This function can be used to retrieve data from a relation + in the charm code when outside an event callback. + + Returns: + a dict of the values stored in the relation data bag + for all relation instances (indexed by the relation id). + """ + data = {} + + for relation in self.relations: + data[relation.id] = self._load_relation_data(relation.data[self.charm.app]) + return data + + def update_connection_info(self, relation_id: int, connection_data: dict) -> None: + """Updates the credential data as set of key-value pairs in the relation. + + This function writes in the application data bag, therefore, + only the leader unit can call it. + + Args: + relation_id: the identifier for a particular relation. + connection_data: dict containing the key-value pairs + that should be updated. + """ + # check and write changes only if you are the leader + if not self.local_unit.is_leader(): + return + + relation = self.charm.model.get_relation(self.relation_name, relation_id) + + if not relation: + return + + # update the databag, if connection data did not change with respect to before + # the relation changed event is not triggered + # configuration options that are list + s3_list_options = ["attributes", "tls-ca-chain"] + updated_connection_data = {} + for configuration_option, configuration_value in connection_data.items(): + if configuration_option in s3_list_options: + updated_connection_data[configuration_option] = json.dumps(configuration_value) + else: + updated_connection_data[configuration_option] = configuration_value + + relation.data[self.local_app].update(updated_connection_data) + logger.debug(f"Updated S3 credentials: {updated_connection_data}") + + def _load_relation_data(self, raw_relation_data: RelationDataContent) -> Dict[str, str]: + """Loads relation data from the relation data bag. + + Args: + raw_relation_data: Relation data from the databag + Returns: + dict: Relation data in dict format. + """ + connection_data = {} + for key in raw_relation_data: + try: + connection_data[key] = json.loads(raw_relation_data[key]) + except (json.decoder.JSONDecodeError, TypeError): + connection_data[key] = raw_relation_data[key] + return connection_data + + def _diff(self, event: RelationChangedEvent) -> Diff: + """Retrieves the diff of the data in the relation changed databag. + + Args: + event: relation changed event. + + Returns: + a Diff instance containing the added, deleted and changed + keys from the event relation databag. + """ + return diff(event, self.local_unit) + + def _on_relation_changed(self, event: RelationChangedEvent) -> None: + """Notify the charm about the presence of S3 credentials.""" + # check if the mandatory options are in the relation data + contains_required_options = True + # get current credentials data + credentials = self.get_s3_connection_info() + # records missing options + missing_options = [] + for configuration_option in S3_REQUIRED_OPTIONS: + if configuration_option not in credentials: + contains_required_options = False + missing_options.append(configuration_option) + # emit credential change event only if all mandatory fields are present + if contains_required_options: + getattr(self.on, "credentials_changed").emit( + event.relation, app=event.app, unit=event.unit + ) + else: + logger.warning( + f"Some mandatory fields: {missing_options} are not present, do not emit credential change event!" + ) + + def get_s3_connection_info(self) -> Dict[str, str]: + """Return the s3 credentials as a dictionary.""" + for relation in self.relations: + if relation and relation.app: + return self._load_relation_data(relation.data[relation.app]) + + return {} + + def _on_relation_broken(self, event: RelationBrokenEvent) -> None: + """Notify the charm about a broken S3 credential store relation.""" + getattr(self.on, "credentials_gone").emit(event.relation, app=event.app, unit=event.unit) + + @property + def relations(self) -> List[Relation]: + """The list of Relation instances associated with this relation_name.""" + return list(self.charm.model.relations[self.relation_name]) diff --git a/examples/django/charm/lib/charms/loki_k8s/v0/loki_push_api.py b/examples/django/charm/lib/charms/loki_k8s/v0/loki_push_api.py new file mode 100644 index 0000000..16e1294 --- /dev/null +++ b/examples/django/charm/lib/charms/loki_k8s/v0/loki_push_api.py @@ -0,0 +1,2510 @@ +#!/usr/bin/env python3 +# Copyright 2021 Canonical Ltd. +# See LICENSE file for licensing details. +# +# Learn more at: https://juju.is/docs/sdk + +r"""## Overview. + +This document explains how to use the two principal objects this library provides: + +- `LokiPushApiProvider`: This object is meant to be used by any Charmed Operator that needs to +implement the provider side of the `loki_push_api` relation interface. For instance, a Loki charm. +The provider side of the relation represents the server side, to which logs are being pushed. + +- `LokiPushApiConsumer`: Used to obtain the loki api endpoint. This is useful for configuring + applications such as pebble, or charmed operators of workloads such as grafana-agent or promtail, + that can communicate with loki directly. + +- `LogProxyConsumer`: DEPRECATED. +This object can be used by any Charmed Operator which needs to send telemetry, such as logs, to +Loki through a Log Proxy by implementing the consumer side of the `loki_push_api` relation +interface. + +Filtering logs in Loki is largely performed on the basis of labels. In the Juju ecosystem, Juju +topology labels are used to uniquely identify the workload which generates telemetry like logs. + +In order to be able to control the labels on the logs pushed this object adds a Pebble layer +that runs Promtail in the workload container, injecting Juju topology labels into the +logs on the fly. + +## LokiPushApiProvider Library Usage + +This object may be used by any Charmed Operator which implements the `loki_push_api` interface. +For instance, Loki or Grafana Agent. + +For this purpose a charm needs to instantiate the `LokiPushApiProvider` object with one mandatory +and three optional arguments. + +- `charm`: A reference to the parent (Loki) charm. + +- `relation_name`: The name of the relation that the charm uses to interact + with its clients, which implement `LokiPushApiConsumer` or `LogProxyConsumer` + (note that LogProxyConsumer is deprecated). + + If provided, this relation name must match a provided relation in metadata.yaml with the + `loki_push_api` interface. + + The default relation name is "logging" for `LokiPushApiConsumer` and "log-proxy" for + `LogProxyConsumer` (note that LogProxyConsumer is deprecated). + + For example, a provider's `metadata.yaml` file may look as follows: + + ```yaml + provides: + logging: + interface: loki_push_api + ``` + + Subsequently, a Loki charm may instantiate the `LokiPushApiProvider` in its constructor as + follows: + + from charms.loki_k8s.v0.loki_push_api import LokiPushApiProvider + from loki_server import LokiServer + ... + + class LokiOperatorCharm(CharmBase): + ... + + def __init__(self, *args): + super().__init__(*args) + ... + external_url = urlparse(self._external_url) + self.loki_provider = LokiPushApiProvider( + self, + address=external_url.hostname or self.hostname, + port=external_url.port or 80, + scheme=external_url.scheme, + path=f"{external_url.path}/loki/api/v1/push", + ) + ... + + - `port`: Loki Push Api endpoint port. Default value: `3100`. + - `scheme`: Loki Push Api endpoint scheme (`HTTP` or `HTTPS`). Default value: `HTTP` + - `address`: Loki Push Api endpoint address. Default value: `localhost` + - `path`: Loki Push Api endpoint path. Default value: `loki/api/v1/push` + + +The `LokiPushApiProvider` object has several responsibilities: + +1. Set the URL of the Loki Push API in the relation application data bag; the URL + must be unique to all instances (e.g. using a load balancer). + +2. Set the Promtail binary URL (`promtail_binary_zip_url`) so clients that use + `LogProxyConsumer` object could download and configure it. + +3. Process the metadata of the consumer application, provided via the + "metadata" field of the consumer data bag, which are used to annotate the + alert rules (see next point). An example for "metadata" is the following: + + {'model': 'loki', + 'model_uuid': '0b7d1071-ded2-4bf5-80a3-10a81aeb1386', + 'application': 'promtail-k8s' + } + +4. Process alert rules set into the relation by the `LokiPushApiConsumer` + objects, e.g.: + + '{ + "groups": [{ + "name": "loki_0b7d1071-ded2-4bf5-80a3-10a81aeb1386_promtail-k8s_alerts", + "rules": [{ + "alert": "HighPercentageError", + "expr": "sum(rate({app=\\"foo\\", env=\\"production\\"} |= \\"error\\" [5m])) + by (job) \\n /\\nsum(rate({app=\\"foo\\", env=\\"production\\"}[5m])) + by (job)\\n > 0.05 + \\n", "for": "10m", + "labels": { + "severity": "page", + "juju_model": "loki", + "juju_model_uuid": "0b7d1071-ded2-4bf5-80a3-10a81aeb1386", + "juju_application": "promtail-k8s" + }, + "annotations": { + "summary": "High request latency" + } + }] + }] + }' + + +Once these alert rules are sent over relation data, the `LokiPushApiProvider` object +stores these files in the directory `/loki/rules` inside the Loki charm container. After +storing alert rules files, the object will check alert rules by querying Loki API +endpoint: [`loki/api/v1/rules`](https://grafana.com/docs/loki/latest/api/#list-rule-groups). +If there are changes in the alert rules a `loki_push_api_alert_rules_changed` event will +be emitted with details about the `RelationEvent` which triggered it. + +This events should be observed in the charm that uses `LokiPushApiProvider`: + +```python + def __init__(self, *args): + super().__init__(*args) + ... + self.loki_provider = LokiPushApiProvider(self) + self.framework.observe( + self.loki_provider.on.loki_push_api_alert_rules_changed, + self._loki_push_api_alert_rules_changed, + ) +``` + + +## LokiPushApiConsumer Library Usage + +This Loki charm interacts with its clients using the Loki charm library. Charms +seeking to send log to Loki, must do so using the `LokiPushApiConsumer` object from +this charm library. + +> **NOTE**: `LokiPushApiConsumer` also depends on an additional charm library. +> +> Ensure sure you `charmcraft fetch-lib charms.observability_libs.v0.juju_topology` +> when using this library. + +For the simplest use cases, using the `LokiPushApiConsumer` object only requires +instantiating it, typically in the constructor of your charm (the one which +sends logs). + +```python +from charms.loki_k8s.v0.loki_push_api import LokiPushApiConsumer + +class LokiClientCharm(CharmBase): + + def __init__(self, *args): + super().__init__(*args) + ... + self._loki_consumer = LokiPushApiConsumer(self) +``` + +The `LokiPushApiConsumer` constructor requires two things: + +- A reference to the parent (LokiClientCharm) charm. + +- Optionally, the name of the relation that the Loki charm uses to interact + with its clients. If provided, this relation name must match a required + relation in metadata.yaml with the `loki_push_api` interface. + + This argument is not required if your metadata.yaml has precisely one + required relation in metadata.yaml with the `loki_push_api` interface, as the + lib will automatically resolve the relation name inspecting the using the + meta information of the charm + +Any time the relation between a Loki provider charm and a Loki consumer charm is +established, a `LokiPushApiEndpointJoined` event is fired. In the consumer side +is it possible to observe this event with: + +```python + +self.framework.observe( + self._loki_consumer.on.loki_push_api_endpoint_joined, + self._on_loki_push_api_endpoint_joined, +) +``` + +Any time there are departures in relations between the consumer charm and Loki +the consumer charm is informed, through a `LokiPushApiEndpointDeparted` event, for instance: + +```python +self.framework.observe( + self._loki_consumer.on.loki_push_api_endpoint_departed, + self._on_loki_push_api_endpoint_departed, +) +``` + +The consumer charm can then choose to update its configuration in both situations. + +Note that LokiPushApiConsumer does not add any labels automatically on its own. In +order to better integrate with the Canonical Observability Stack, you may want to configure your +software to add Juju topology labels. The +[observability-libs](https://charmhub.io/observability-libs) library can be used to get topology +labels in charm code. See :func:`LogProxyConsumer._scrape_configs` for an example of how +to do this with promtail. + +## LogProxyConsumer Library Usage + +> Note: This object is deprecated. Consider migrating to LogForwarder (see v1/loki_push_api) with +> the release of Juju 3.6 LTS. + +Let's say that we have a workload charm that produces logs, and we need to send those logs to a +workload implementing the `loki_push_api` interface, such as `Loki` or `Grafana Agent`. + +Adopting this object in a Charmed Operator consist of two steps: + +1. Use the `LogProxyConsumer` class by instantiating it in the `__init__` method of the charmed + operator. There are two ways to get logs in to promtail. You can give it a list of files to + read, or you can write to it using the syslog protocol. + + For example: + + ```python + from charms.loki_k8s.v0.loki_push_api import LogProxyConsumer + + ... + + def __init__(self, *args): + ... + self._log_proxy = LogProxyConsumer( + charm=self, log_files=LOG_FILES, container_name=PEER, enable_syslog=True + ) + + self.framework.observe( + self._log_proxy.on.promtail_digest_error, + self._promtail_error, + ) + + def _promtail_error(self, event): + logger.error(event.message) + self.unit.status = BlockedStatus(event.message) + ``` + + Any time the relation between a provider charm and a LogProxy consumer charm is + established, a `LogProxyEndpointJoined` event is fired. In the consumer side is it + possible to observe this event with: + + ```python + + self.framework.observe( + self._log_proxy.on.log_proxy_endpoint_joined, + self._on_log_proxy_endpoint_joined, + ) + ``` + + Any time there are departures in relations between the consumer charm and the provider + the consumer charm is informed, through a `LogProxyEndpointDeparted` event, for instance: + + ```python + self.framework.observe( + self._log_proxy.on.log_proxy_endpoint_departed, + self._on_log_proxy_endpoint_departed, + ) + ``` + + The consumer charm can then choose to update its configuration in both situations. + + Note that: + + - `LOG_FILES` is a `list` containing the log files we want to send to `Loki` or + `Grafana Agent`, for instance: + + ```python + LOG_FILES = [ + "/var/log/apache2/access.log", + "/var/log/alternatives.log", + ] + ``` + + - `container_name` is the name of the container in which the application is running. + If in the Pod there is only one container, this argument can be omitted. + + - You can configure your syslog software using `localhost` as the address and the method + `LogProxyConsumer.syslog_port` to get the port, or, alternatively, if you are using rsyslog + you may use the method `LogProxyConsumer.rsyslog_config()`. + +2. Modify the `metadata.yaml` file to add: + + - The `log-proxy` relation in the `requires` section: + ```yaml + requires: + log-proxy: + interface: loki_push_api + optional: true + ``` + +Once the library is implemented in a Charmed Operator and a relation is established with +the charm that implements the `loki_push_api` interface, the library will inject a +Pebble layer that runs Promtail in the workload container to send logs. + +By default, the promtail binary injected into the container will be downloaded from the internet. +If, for any reason, the container has limited network access, you may allow charm administrators +to provide their own promtail binary at runtime by adding the following snippet to your charm +metadata: + +```yaml +resources: + promtail-bin: + type: file + description: Promtail binary for logging + filename: promtail-linux +``` + +Which would then allow operators to deploy the charm this way: + +``` +juju deploy \ + ./your_charm.charm \ + --resource promtail-bin=/tmp/promtail-linux-amd64 +``` + +If a different resource name is used, it can be specified with the `promtail_resource_name` +argument to the `LogProxyConsumer` constructor. + +The object can emit a `PromtailDigestError` event: + +- Promtail binary cannot be downloaded. +- The sha256 sum mismatch for promtail binary. + +The object can raise a `ContainerNotFoundError` event: + +- No `container_name` parameter has been specified and the Pod has more than 1 container. + +These can be monitored via the PromtailDigestError events via: + +```python + self.framework.observe( + self._loki_consumer.on.promtail_digest_error, + self._promtail_error, + ) + + def _promtail_error(self, event): + logger.error(msg) + self.unit.status = BlockedStatus(event.message) + ) +``` + +## Alerting Rules + +This charm library also supports gathering alerting rules from all related Loki client +charms and enabling corresponding alerts within the Loki charm. Alert rules are +automatically gathered by `LokiPushApiConsumer` object from a directory conventionally +named `loki_alert_rules`. + +This directory must reside at the top level in the `src` folder of the +consumer charm. Each file in this directory is assumed to be a single alert rule +in YAML format. The file name must have the `.rule` extension. +The format of this alert rule conforms to the +[Loki docs](https://grafana.com/docs/loki/latest/rules/#alerting-rules). + +An example of the contents of one such file is shown below. + +```yaml +alert: HighPercentageError +expr: | + sum(rate({%%juju_topology%%} |= "error" [5m])) by (job) + / + sum(rate({%%juju_topology%%}[5m])) by (job) + > 0.05 +for: 10m +labels: + severity: page +annotations: + summary: High request latency + +``` + +It is **critical** to use the `%%juju_topology%%` filter in the expression for the alert +rule shown above. This filter is a stub that is automatically replaced by the +`LokiPushApiConsumer` following Loki Client's Juju topology (application, model and its +UUID). Such a topology filter is essential to ensure that alert rules submitted by one +provider charm generates alerts only for that same charm. + +The Loki charm may be related to multiple Loki client charms. Without this, filter +rules submitted by one provider charm will also result in corresponding alerts for other +provider charms. Hence, every alert rule expression must include such a topology filter stub. + +Gathering alert rules and generating rule files within the Loki charm is easily done using +the `alerts()` method of `LokiPushApiProvider`. Alerts generated by Loki will automatically +include Juju topology labels in the alerts. These labels indicate the source of the alert. + +The following labels are automatically added to every alert + +- `juju_model` +- `juju_model_uuid` +- `juju_application` + + +Whether alert rules files does not contain the keys `alert` or `expr` or there is no alert +rules file in `alert_rules_path` a `loki_push_api_alert_rules_error` event is emitted. + +To handle these situations the event must be observed in the `LokiClientCharm` charm.py file: + +```python +class LokiClientCharm(CharmBase): + + def __init__(self, *args): + super().__init__(*args) + ... + self._loki_consumer = LokiPushApiConsumer(self) + + self.framework.observe( + self._loki_consumer.on.loki_push_api_alert_rules_error, + self._alert_rules_error + ) + + def _alert_rules_error(self, event): + self.unit.status = BlockedStatus(event.message) +``` + +## Relation Data + +The Loki charm uses both application and unit relation data to obtain information regarding +Loki Push API and alert rules. + +Units of consumer charm send their alert rules over app relation data using the `alert_rules` +key. +""" + +import json +import logging +import os +import platform +import re +import socket +import subprocess +import tempfile +import typing +from copy import deepcopy +from gzip import GzipFile +from hashlib import sha256 +from io import BytesIO +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple, Union, cast +from urllib import request +from urllib.error import HTTPError + +import yaml +from cosl import JujuTopology +from ops.charm import ( + CharmBase, + HookEvent, + RelationBrokenEvent, + RelationCreatedEvent, + RelationDepartedEvent, + RelationEvent, + RelationJoinedEvent, + RelationRole, + WorkloadEvent, +) +from ops.framework import EventBase, EventSource, Object, ObjectEvents +from ops.model import Container, ModelError, Relation +from ops.pebble import APIError, ChangeError, PathError, ProtocolError + +# The unique Charmhub library identifier, never change it +LIBID = "bf76f23cdd03464b877c52bd1d2f563e" + +# Increment this major API version when introducing breaking changes +LIBAPI = 0 + +# Increment this PATCH version before using `charmcraft publish-lib` or reset +# to 0 if you are raising the major API version +LIBPATCH = 30 + +PYDEPS = ["cosl"] + +logger = logging.getLogger(__name__) + +RELATION_INTERFACE_NAME = "loki_push_api" +DEFAULT_RELATION_NAME = "logging" +DEFAULT_ALERT_RULES_RELATIVE_PATH = "./src/loki_alert_rules" +DEFAULT_LOG_PROXY_RELATION_NAME = "log-proxy" + +PROMTAIL_BASE_URL = "https://github.com/canonical/loki-k8s-operator/releases/download" +# To update Promtail version you only need to change the PROMTAIL_VERSION and +# update all sha256 sums in PROMTAIL_BINARIES. To support a new architecture +# you only need to add a new key value pair for the architecture in PROMTAIL_BINARIES. +PROMTAIL_VERSION = "v2.5.0" +PROMTAIL_BINARIES = { + "amd64": { + "filename": "promtail-static-amd64", + "zipsha": "543e333b0184e14015a42c3c9e9e66d2464aaa66eca48b29e185a6a18f67ab6d", + "binsha": "17e2e271e65f793a9fbe81eab887b941e9d680abe82d5a0602888c50f5e0cac9", + }, +} + +# Paths in `charm` container +BINARY_DIR = "/tmp" + +# Paths in `workload` container +WORKLOAD_BINARY_DIR = "/opt/promtail" +WORKLOAD_CONFIG_DIR = "/etc/promtail" +WORKLOAD_CONFIG_FILE_NAME = "promtail_config.yaml" +WORKLOAD_CONFIG_PATH = "{}/{}".format(WORKLOAD_CONFIG_DIR, WORKLOAD_CONFIG_FILE_NAME) +WORKLOAD_POSITIONS_PATH = "{}/positions.yaml".format(WORKLOAD_BINARY_DIR) +WORKLOAD_SERVICE_NAME = "promtail" + +HTTP_LISTEN_PORT = 9080 +GRPC_LISTEN_PORT = 9095 + + +class RelationNotFoundError(ValueError): + """Raised if there is no relation with the given name.""" + + def __init__(self, relation_name: str): + self.relation_name = relation_name + self.message = "No relation named '{}' found".format(relation_name) + + super().__init__(self.message) + + +class RelationInterfaceMismatchError(Exception): + """Raised if the relation with the given name has a different interface.""" + + def __init__( + self, + relation_name: str, + expected_relation_interface: str, + actual_relation_interface: str, + ): + self.relation_name = relation_name + self.expected_relation_interface = expected_relation_interface + self.actual_relation_interface = actual_relation_interface + self.message = ( + "The '{}' relation has '{}' as interface rather than the expected '{}'".format( + relation_name, actual_relation_interface, expected_relation_interface + ) + ) + super().__init__(self.message) + + +class RelationRoleMismatchError(Exception): + """Raised if the relation with the given name has a different direction.""" + + def __init__( + self, + relation_name: str, + expected_relation_role: RelationRole, + actual_relation_role: RelationRole, + ): + self.relation_name = relation_name + self.expected_relation_interface = expected_relation_role + self.actual_relation_role = actual_relation_role + self.message = "The '{}' relation has role '{}' rather than the expected '{}'".format( + relation_name, repr(actual_relation_role), repr(expected_relation_role) + ) + super().__init__(self.message) + + +def _validate_relation_by_interface_and_direction( + charm: CharmBase, + relation_name: str, + expected_relation_interface: str, + expected_relation_role: RelationRole, +): + """Verifies that a relation has the necessary characteristics. + + Verifies that the `relation_name` provided: (1) exists in metadata.yaml, + (2) declares as interface the interface name passed as `relation_interface` + and (3) has the right "direction", i.e., it is a relation that `charm` + provides or requires. + + Args: + charm: a `CharmBase` object to scan for the matching relation. + relation_name: the name of the relation to be verified. + expected_relation_interface: the interface name to be matched by the + relation named `relation_name`. + expected_relation_role: whether the `relation_name` must be either + provided or required by `charm`. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the same relation interface + as specified via the `expected_relation_interface` argument. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the same role as specified + via the `expected_relation_role` argument. + """ + if relation_name not in charm.meta.relations: + raise RelationNotFoundError(relation_name) + + relation = charm.meta.relations[relation_name] + + actual_relation_interface = relation.interface_name + if actual_relation_interface != expected_relation_interface: + raise RelationInterfaceMismatchError( + relation_name, + expected_relation_interface, + actual_relation_interface, # pyright: ignore + ) + + if expected_relation_role == RelationRole.provides: + if relation_name not in charm.meta.provides: + raise RelationRoleMismatchError( + relation_name, RelationRole.provides, RelationRole.requires + ) + elif expected_relation_role == RelationRole.requires: + if relation_name not in charm.meta.requires: + raise RelationRoleMismatchError( + relation_name, RelationRole.requires, RelationRole.provides + ) + else: + raise Exception("Unexpected RelationDirection: {}".format(expected_relation_role)) + + +class InvalidAlertRulePathError(Exception): + """Raised if the alert rules folder cannot be found or is otherwise invalid.""" + + def __init__( + self, + alert_rules_absolute_path: Path, + message: str, + ): + self.alert_rules_absolute_path = alert_rules_absolute_path + self.message = message + + super().__init__(self.message) + + +def _is_official_alert_rule_format(rules_dict: dict) -> bool: + """Are alert rules in the upstream format as supported by Loki. + + Alert rules in dictionary format are in "official" form if they + contain a "groups" key, since this implies they contain a list of + alert rule groups. + + Args: + rules_dict: a set of alert rules in Python dictionary format + + Returns: + True if alert rules are in official Loki file format. + """ + return "groups" in rules_dict + + +def _is_single_alert_rule_format(rules_dict: dict) -> bool: + """Are alert rules in single rule format. + + The Loki charm library supports reading of alert rules in a + custom format that consists of a single alert rule per file. This + does not conform to the official Loki alert rule file format + which requires that each alert rules file consists of a list of + alert rule groups and each group consists of a list of alert + rules. + + Alert rules in dictionary form are considered to be in single rule + format if in the least it contains two keys corresponding to the + alert rule name and alert expression. + + Returns: + True if alert rule is in single rule file format. + """ + # one alert rule per file + return set(rules_dict) >= {"alert", "expr"} + + +class AlertRules: + """Utility class for amalgamating Loki alert rule files and injecting juju topology. + + An `AlertRules` object supports aggregating alert rules from files and directories in both + official and single rule file formats using the `add_path()` method. All the alert rules + read are annotated with Juju topology labels and amalgamated into a single data structure + in the form of a Python dictionary using the `as_dict()` method. Such a dictionary can be + easily dumped into JSON format and exchanged over relation data. The dictionary can also + be dumped into YAML format and written directly into an alert rules file that is read by + Loki. Note that multiple `AlertRules` objects must not be written into the same file, + since Loki allows only a single list of alert rule groups per alert rules file. + + The official Loki format is a YAML file conforming to the Loki documentation + (https://grafana.com/docs/loki/latest/api/#list-rule-groups). + The custom single rule format is a subsection of the official YAML, having a single alert + rule, effectively "one alert per file". + """ + + # This class uses the following terminology for the various parts of a rule file: + # - alert rules file: the entire groups[] yaml, including the "groups:" key. + # - alert groups (plural): the list of groups[] (a list, i.e. no "groups:" key) - it is a list + # of dictionaries that have the "name" and "rules" keys. + # - alert group (singular): a single dictionary that has the "name" and "rules" keys. + # - alert rules (plural): all the alerts in a given alert group - a list of dictionaries with + # the "alert" and "expr" keys. + # - alert rule (singular): a single dictionary that has the "alert" and "expr" keys. + + def __init__(self, topology: Optional[JujuTopology] = None): + """Build and alert rule object. + + Args: + topology: a `JujuTopology` instance that is used to annotate all alert rules. + """ + self.topology = topology + self.tool = CosTool(None) + self.alert_groups = [] # type: List[dict] + + def _from_file(self, root_path: Path, file_path: Path) -> List[dict]: + """Read a rules file from path, injecting juju topology. + + Args: + root_path: full path to the root rules folder (used only for generating group name) + file_path: full path to a *.rule file. + + Returns: + A list of dictionaries representing the rules file, if file is valid (the structure is + formed by `yaml.safe_load` of the file); an empty list otherwise. + """ + with file_path.open() as rf: + # Load a list of rules from file then add labels and filters + try: + rule_file = yaml.safe_load(rf) or {} + + except Exception as e: + logger.error("Failed to read alert rules from %s: %s", file_path.name, e) + return [] + + if _is_official_alert_rule_format(rule_file): + alert_groups = rule_file["groups"] + elif _is_single_alert_rule_format(rule_file): + # convert to list of alert groups + # group name is made up from the file name + alert_groups = [{"name": file_path.stem, "rules": [rule_file]}] + else: + # invalid/unsupported + reason = "file is empty" if not rule_file else "unexpected file structure" + logger.error("Invalid rules file (%s): %s", reason, file_path.name) + return [] + + # update rules with additional metadata + for alert_group in alert_groups: + # update group name with topology and sub-path + alert_group["name"] = self._group_name( + str(root_path), + str(file_path), + alert_group["name"], + ) + + # add "juju_" topology labels + for alert_rule in alert_group["rules"]: + if "labels" not in alert_rule: + alert_rule["labels"] = {} + + if self.topology: + alert_rule["labels"].update(self.topology.label_matcher_dict) + # insert juju topology filters into a prometheus alert rule + # logql doesn't like empty matchers, so add a job matcher which hits + # any string as a "wildcard" which the topology labels will + # filter down + alert_rule["expr"] = self.tool.inject_label_matchers( + re.sub(r"%%juju_topology%%", r'job=~".+"', alert_rule["expr"]), + self.topology.label_matcher_dict, + ) + + return alert_groups + + def _group_name( + self, + root_path: typing.Union[Path, str], + file_path: typing.Union[Path, str], + group_name: str, + ) -> str: + """Generate group name from path and topology. + + The group name is made up of the relative path between the root dir_path, the file path, + and topology identifier. + + Args: + root_path: path to the root rules dir. + file_path: path to rule file. + group_name: original group name to keep as part of the new augmented group name + + Returns: + New group name, augmented by juju topology and relative path. + """ + file_path = Path(file_path) if not isinstance(file_path, Path) else file_path + root_path = Path(root_path) if not isinstance(root_path, Path) else root_path + rel_path = file_path.parent.relative_to(root_path.as_posix()) + + # We should account for both absolute paths and Windows paths. Convert it to a POSIX + # string, strip off any leading /, then join it + + path_str = "" + if not rel_path == Path("."): + # Get rid of leading / and optionally drive letters so they don't muck up + # the template later, since Path.parts returns them. The 'if relpath.is_absolute ...' + # isn't even needed since re.sub doesn't throw exceptions if it doesn't match, so it's + # optional, but it makes it clear what we're doing. + + # Note that Path doesn't actually care whether the path is valid just to instantiate + # the object, so we can happily strip that stuff out to make templating nicer + rel_path = Path( + re.sub(r"^([A-Za-z]+:)?/", "", rel_path.as_posix()) + if rel_path.is_absolute() + else str(rel_path) + ) + + # Get rid of relative path characters in the middle which both os.path and pathlib + # leave hanging around. We could use path.resolve(), but that would lead to very + # long template strings when rules come from pods and/or other deeply nested charm + # paths + path_str = "_".join(filter(lambda x: x not in ["..", "/"], rel_path.parts)) + + # Generate group name: + # - name, from juju topology + # - suffix, from the relative path of the rule file; + group_name_parts = [self.topology.identifier] if self.topology else [] + group_name_parts.extend([path_str, group_name, "alerts"]) + # filter to remove empty strings + return "_".join(filter(lambda x: x, group_name_parts)) + + @classmethod + def _multi_suffix_glob( + cls, dir_path: Path, suffixes: List[str], recursive: bool = True + ) -> list: + """Helper function for getting all files in a directory that have a matching suffix. + + Args: + dir_path: path to the directory to glob from. + suffixes: list of suffixes to include in the glob (items should begin with a period). + recursive: a flag indicating whether a glob is recursive (nested) or not. + + Returns: + List of files in `dir_path` that have one of the suffixes specified in `suffixes`. + """ + all_files_in_dir = dir_path.glob("**/*" if recursive else "*") + return list(filter(lambda f: f.is_file() and f.suffix in suffixes, all_files_in_dir)) + + def _from_dir(self, dir_path: Path, recursive: bool) -> List[dict]: + """Read all rule files in a directory. + + All rules from files for the same directory are loaded into a single + group. The generated name of this group includes juju topology. + By default, only the top directory is scanned; for nested scanning, pass `recursive=True`. + + Args: + dir_path: directory containing *.rule files (alert rules without groups). + recursive: flag indicating whether to scan for rule files recursively. + + Returns: + a list of dictionaries representing prometheus alert rule groups, each dictionary + representing an alert group (structure determined by `yaml.safe_load`). + """ + alert_groups = [] # type: List[dict] + + # Gather all alerts into a list of groups + for file_path in self._multi_suffix_glob(dir_path, [".rule", ".rules"], recursive): + alert_groups_from_file = self._from_file(dir_path, file_path) + if alert_groups_from_file: + logger.debug("Reading alert rule from %s", file_path) + alert_groups.extend(alert_groups_from_file) + + return alert_groups + + def add_path(self, path_str: str, *, recursive: bool = False): + """Add rules from a dir path. + + All rules from files are aggregated into a data structure representing a single rule file. + All group names are augmented with juju topology. + + Args: + path_str: either a rules file or a dir of rules files. + recursive: whether to read files recursively or not (no impact if `path` is a file). + + Raises: + InvalidAlertRulePathError: if the provided path is invalid. + """ + path = Path(path_str) # type: Path + if path.is_dir(): + self.alert_groups.extend(self._from_dir(path, recursive)) + elif path.is_file(): + self.alert_groups.extend(self._from_file(path.parent, path)) + else: + logger.debug("The alerts file does not exist: %s", path) + + def as_dict(self) -> dict: + """Return standard alert rules file in dict representation. + + Returns: + a dictionary containing a single list of alert rule groups. + The list of alert rule groups is provided as value of the + "groups" dictionary key. + """ + return {"groups": self.alert_groups} if self.alert_groups else {} + + +def _resolve_dir_against_charm_path(charm: CharmBase, *path_elements: str) -> str: + """Resolve the provided path items against the directory of the main file. + + Look up the directory of the `main.py` file being executed. This is normally + going to be the charm.py file of the charm including this library. Then, resolve + the provided path elements and, if the result path exists and is a directory, + return its absolute path; otherwise, raise en exception. + + Raises: + InvalidAlertRulePathError, if the path does not exist or is not a directory. + """ + charm_dir = Path(str(charm.charm_dir)) + if not charm_dir.exists() or not charm_dir.is_dir(): + # Operator Framework does not currently expose a robust + # way to determine the top level charm source directory + # that is consistent across deployed charms and unit tests + # Hence for unit tests the current working directory is used + # TODO: updated this logic when the following ticket is resolved + # https://github.com/canonical/operator/issues/643 + charm_dir = Path(os.getcwd()) + + alerts_dir_path = charm_dir.absolute().joinpath(*path_elements) + + if not alerts_dir_path.exists(): + raise InvalidAlertRulePathError(alerts_dir_path, "directory does not exist") + if not alerts_dir_path.is_dir(): + raise InvalidAlertRulePathError(alerts_dir_path, "is not a directory") + + return str(alerts_dir_path) + + +class NoRelationWithInterfaceFoundError(Exception): + """No relations with the given interface are found in the charm meta.""" + + def __init__(self, charm: CharmBase, relation_interface: Optional[str] = None): + self.charm = charm + self.relation_interface = relation_interface + self.message = ( + "No relations with interface '{}' found in the meta of the '{}' charm".format( + relation_interface, charm.meta.name + ) + ) + + super().__init__(self.message) + + +class MultipleRelationsWithInterfaceFoundError(Exception): + """Multiple relations with the given interface are found in the charm meta.""" + + def __init__(self, charm: CharmBase, relation_interface: str, relations: list): + self.charm = charm + self.relation_interface = relation_interface + self.relations = relations + self.message = ( + "Multiple relations with interface '{}' found in the meta of the '{}' charm.".format( + relation_interface, charm.meta.name + ) + ) + super().__init__(self.message) + + +class LokiPushApiEndpointDeparted(EventBase): + """Event emitted when Loki departed.""" + + +class LokiPushApiEndpointJoined(EventBase): + """Event emitted when Loki joined.""" + + +class LokiPushApiAlertRulesChanged(EventBase): + """Event emitted if there is a change in the alert rules.""" + + def __init__(self, handle, relation, relation_id, app=None, unit=None): + """Pretend we are almost like a RelationEvent. + + Fields to serialize: + { + "relation_name": , + "relation_id": , + "app_name": , + "unit_name": + } + + In this way, we can transparently use `RelationEvent.snapshot()` to pass + it back if we need to log it. + """ + super().__init__(handle) + self.relation = relation + self.relation_id = relation_id + self.app = app + self.unit = unit + + def snapshot(self) -> Dict: + """Save event information.""" + if not self.relation: + return {} + snapshot = {"relation_name": self.relation.name, "relation_id": self.relation.id} + if self.app: + snapshot["app_name"] = self.app.name + if self.unit: + snapshot["unit_name"] = self.unit.name + return snapshot + + def restore(self, snapshot: dict): + """Restore event information.""" + self.relation = self.framework.model.get_relation( + snapshot["relation_name"], snapshot["relation_id"] + ) + app_name = snapshot.get("app_name") + if app_name: + self.app = self.framework.model.get_app(app_name) + else: + self.app = None + unit_name = snapshot.get("unit_name") + if unit_name: + self.unit = self.framework.model.get_unit(unit_name) + else: + self.unit = None + + +class InvalidAlertRuleEvent(EventBase): + """Event emitted when alert rule files are not parsable. + + Enables us to set a clear status on the provider. + """ + + def __init__(self, handle, errors: str = "", valid: bool = False): + super().__init__(handle) + self.errors = errors + self.valid = valid + + def snapshot(self) -> Dict: + """Save alert rule information.""" + return { + "valid": self.valid, + "errors": self.errors, + } + + def restore(self, snapshot): + """Restore alert rule information.""" + self.valid = snapshot["valid"] + self.errors = snapshot["errors"] + + +class LokiPushApiEvents(ObjectEvents): + """Event descriptor for events raised by `LokiPushApiProvider`.""" + + loki_push_api_endpoint_departed = EventSource(LokiPushApiEndpointDeparted) + loki_push_api_endpoint_joined = EventSource(LokiPushApiEndpointJoined) + loki_push_api_alert_rules_changed = EventSource(LokiPushApiAlertRulesChanged) + alert_rule_status_changed = EventSource(InvalidAlertRuleEvent) + + +class LokiPushApiProvider(Object): + """A LokiPushApiProvider class.""" + + on = LokiPushApiEvents() # pyright: ignore + + def __init__( + self, + charm, + relation_name: str = DEFAULT_RELATION_NAME, + *, + port: Union[str, int] = 3100, + scheme: str = "http", + address: str = "localhost", + path: str = "loki/api/v1/push", + ): + """A Loki service provider. + + Args: + charm: a `CharmBase` instance that manages this + instance of the Loki service. + relation_name: an optional string name of the relation between `charm` + and the Loki charmed service. The default is "logging". + It is strongly advised not to change the default, so that people + deploying your charm will have a consistent experience with all + other charms that consume metrics endpoints. + port: an optional port of the Loki service (default is "3100"). + scheme: an optional scheme of the Loki API URL (default is "http"). + address: an optional address of the Loki service (default is "localhost"). + path: an optional path of the Loki API URL (default is "loki/api/v1/push") + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `loki_push_api` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.requires` + role. + """ + _validate_relation_by_interface_and_direction( + charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.provides + ) + super().__init__(charm, relation_name) + self._charm = charm + self._relation_name = relation_name + self._tool = CosTool(self) + self.port = int(port) + self.scheme = scheme + self.address = address + self.path = path + + events = self._charm.on[relation_name] + self.framework.observe(self._charm.on.upgrade_charm, self._on_lifecycle_event) + self.framework.observe(events.relation_joined, self._on_logging_relation_joined) + self.framework.observe(events.relation_changed, self._on_logging_relation_changed) + self.framework.observe(events.relation_departed, self._on_logging_relation_departed) + self.framework.observe(events.relation_broken, self._on_logging_relation_broken) + + def _on_lifecycle_event(self, _): + # Upgrade event or other charm-level event + should_update = False + for relation in self._charm.model.relations[self._relation_name]: + # Don't accidentally flip a True result back. + should_update = should_update or self._process_logging_relation_changed(relation) + if should_update: + # We don't have a RelationEvent, so build it up by hand + first_rel = self._charm.model.relations[self._relation_name][0] + self.on.loki_push_api_alert_rules_changed.emit( + relation=first_rel, + relation_id=first_rel.id, + ) + + def _on_logging_relation_joined(self, event: RelationJoinedEvent): + """Set basic data on relation joins. + + Set the promtail binary URL location, which will not change, and anything + else which may be required, but is static.. + + Args: + event: a `CharmEvent` in response to which the consumer + charm must set its relation data. + """ + if self._charm.unit.is_leader(): + event.relation.data[self._charm.app].update(self._promtail_binary_url) + logger.debug("Saved promtail binary url: %s", self._promtail_binary_url) + + def _on_logging_relation_changed(self, event: HookEvent): + """Handle changes in related consumers. + + Anytime there are changes in the relation between Loki + and its consumers charms. + + Args: + event: a `CharmEvent` in response to which the consumer + charm must update its relation data. + """ + should_update = self._process_logging_relation_changed(event.relation) # pyright: ignore + if should_update: + self.on.loki_push_api_alert_rules_changed.emit( + relation=event.relation, # pyright: ignore + relation_id=event.relation.id, # pyright: ignore + app=self._charm.app, + unit=self._charm.unit, + ) + + def _on_logging_relation_broken(self, event: RelationBrokenEvent): + """Removes alert rules files when consumer charms left the relation with Loki. + + Args: + event: a `CharmEvent` in response to which the Loki + charm must update its relation data. + """ + self.on.loki_push_api_alert_rules_changed.emit( + relation=event.relation, + relation_id=event.relation.id, + app=self._charm.app, + unit=self._charm.unit, + ) + + def _on_logging_relation_departed(self, event: RelationDepartedEvent): + """Removes alert rules files when consumer charms left the relation with Loki. + + Args: + event: a `CharmEvent` in response to which the Loki + charm must update its relation data. + """ + self.on.loki_push_api_alert_rules_changed.emit( + relation=event.relation, + relation_id=event.relation.id, + app=self._charm.app, + unit=self._charm.unit, + ) + + def _should_update_alert_rules(self, relation) -> bool: + """Determine whether alert rules should be regenerated. + + If there are alert rules in the relation data bag, tell the charm + whether to regenerate them based on the boolean returned here. + """ + if relation.data.get(relation.app).get("alert_rules", None) is not None: + return True + return False + + def _process_logging_relation_changed(self, relation: Relation) -> bool: + """Handle changes in related consumers. + + Anytime there are changes in relations between Loki + and its consumers charms, Loki set the `loki_push_api` + into the relation data. Set the endpoint building + appropriately, and if there are alert rules present in + the relation, let the caller know. + Besides Loki generates alert rules files based what + consumer charms forwards, + + Args: + relation: the `Relation` instance to update. + + Returns: + A boolean indicating whether an event should be emitted, so we + only emit one on lifecycle events + """ + relation.data[self._charm.unit]["public_address"] = socket.getfqdn() or "" + self.update_endpoint(relation=relation) + return self._should_update_alert_rules(relation) + + @property + def _promtail_binary_url(self) -> dict: + """URL from which Promtail binary can be downloaded.""" + # construct promtail binary url paths from parts + promtail_binaries = {} + for arch, info in PROMTAIL_BINARIES.items(): + info["url"] = "{}/promtail-{}/{}.gz".format( + PROMTAIL_BASE_URL, PROMTAIL_VERSION, info["filename"] + ) + promtail_binaries[arch] = info + + return {"promtail_binary_zip_url": json.dumps(promtail_binaries)} + + def update_endpoint(self, url: str = "", relation: Optional[Relation] = None) -> None: + """Triggers programmatically the update of endpoint in unit relation data. + + This method should be used when the charm relying on this library needs + to update the relation data in response to something occurring outside + the `logging` relation lifecycle, e.g., in case of a + host address change because the charmed operator becomes connected to an + Ingress after the `logging` relation is established. + + Args: + url: An optional url value to update relation data. + relation: An optional instance of `class:ops.model.Relation` to update. + """ + # if no relation is specified update all of them + if not relation: + if not self._charm.model.relations.get(self._relation_name): + return + + relations_list = self._charm.model.relations.get(self._relation_name) + else: + relations_list = [relation] + + endpoint = self._endpoint(url or self._url) + + for relation in relations_list: + relation.data[self._charm.unit].update({"endpoint": json.dumps(endpoint)}) + + logger.debug("Saved endpoint in unit relation data") + + @property + def _url(self) -> str: + """Get local Loki Push API url. + + Return url to loki, including port number, but without the endpoint subpath. + """ + return "http://{}:{}".format(socket.getfqdn(), self.port) + + def _endpoint(self, url) -> dict: + """Get Loki push API endpoint for a given url. + + Args: + url: A loki unit URL. + + Returns: str + """ + endpoint = "/loki/api/v1/push" + return {"url": url.rstrip("/") + endpoint} + + @property + def alerts(self) -> dict: # noqa: C901 + """Fetch alerts for all relations. + + A Loki alert rules file consists of a list of "groups". Each + group consists of a list of alerts (`rules`) that are sequentially + executed. This method returns all the alert rules provided by each + related metrics provider charm. These rules may be used to generate a + separate alert rules file for each relation since the returned list + of alert groups are indexed by relation ID. Also for each relation ID + associated scrape metadata such as Juju model, UUID and application + name are provided so a unique name may be generated for the rules + file. For each relation the structure of data returned is a dictionary + with four keys + + - groups + - model + - model_uuid + - application + + The value of the `groups` key is such that it may be used to generate + a Loki alert rules file directly using `yaml.dump` but the + `groups` key itself must be included as this is required by Loki, + for example as in `yaml.dump({"groups": alerts["groups"]})`. + + Currently only accepts a list of rules and these + rules are all placed into a single group, even though Loki itself + allows for multiple groups within a single alert rules file. + + Returns: + a dictionary of alert rule groups and associated scrape + metadata indexed by relation ID. + """ + alerts = {} # type: Dict[str, dict] # mapping b/w juju identifiers and alert rule files + for relation in self._charm.model.relations[self._relation_name]: + if not relation.units or not relation.app: + continue + + alert_rules = json.loads(relation.data[relation.app].get("alert_rules", "{}")) + if not alert_rules: + continue + + alert_rules = self._inject_alert_expr_labels(alert_rules) + + identifier, topology = self._get_identifier_by_alert_rules(alert_rules) + if not topology: + try: + metadata = json.loads(relation.data[relation.app]["metadata"]) + identifier = JujuTopology.from_dict(metadata).identifier + alerts[identifier] = self._tool.apply_label_matchers(alert_rules) # type: ignore + + except KeyError as e: + logger.debug( + "Relation %s has no 'metadata': %s", + relation.id, + e, + ) + + if not identifier: + logger.error( + "Alert rules were found but no usable group or identifier was present." + ) + continue + + _, errmsg = self._tool.validate_alert_rules(alert_rules) + if errmsg: + relation.data[self._charm.app]["event"] = json.dumps({"errors": errmsg}) + continue + + alerts[identifier] = alert_rules + + return alerts + + def _get_identifier_by_alert_rules( + self, rules: dict + ) -> Tuple[Union[str, None], Union[JujuTopology, None]]: + """Determine an appropriate dict key for alert rules. + + The key is used as the filename when writing alerts to disk, so the structure + and uniqueness is important. + + Args: + rules: a dict of alert rules + Returns: + A tuple containing an identifier, if found, and a JujuTopology, if it could + be constructed. + """ + if "groups" not in rules: + logger.debug("No alert groups were found in relation data") + return None, None + + # Construct an ID based on what's in the alert rules if they have labels + for group in rules["groups"]: + try: + labels = group["rules"][0]["labels"] + topology = JujuTopology( + # Don't try to safely get required constructor fields. There's already + # a handler for KeyErrors + model_uuid=labels["juju_model_uuid"], + model=labels["juju_model"], + application=labels["juju_application"], + unit=labels.get("juju_unit", ""), + charm_name=labels.get("juju_charm", ""), + ) + return topology.identifier, topology + except KeyError: + logger.debug("Alert rules were found but no usable labels were present") + continue + + logger.warning( + "No labeled alert rules were found, and no 'scrape_metadata' " + "was available. Using the alert group name as filename." + ) + try: + for group in rules["groups"]: + return group["name"], None + except KeyError: + logger.debug("No group name was found to use as identifier") + + return None, None + + def _inject_alert_expr_labels(self, rules: Dict[str, Any]) -> Dict[str, Any]: + """Iterate through alert rules and inject topology into expressions. + + Args: + rules: a dict of alert rules + """ + if "groups" not in rules: + return rules + + modified_groups = [] + for group in rules["groups"]: + # Copy off rules, so we don't modify an object we're iterating over + rules_copy = group["rules"] + for idx, rule in enumerate(rules_copy): + labels = rule.get("labels") + + if labels: + try: + topology = JujuTopology( + # Don't try to safely get required constructor fields. There's already + # a handler for KeyErrors + model_uuid=labels["juju_model_uuid"], + model=labels["juju_model"], + application=labels["juju_application"], + unit=labels.get("juju_unit", ""), + charm_name=labels.get("juju_charm", ""), + ) + + # Inject topology and put it back in the list + rule["expr"] = self._tool.inject_label_matchers( + re.sub(r"%%juju_topology%%,?", "", rule["expr"]), + topology.label_matcher_dict, + ) + except KeyError: + # Some required JujuTopology key is missing. Just move on. + pass + + group["rules"][idx] = rule + + modified_groups.append(group) + + rules["groups"] = modified_groups + return rules + + +class ConsumerBase(Object): + """Consumer's base class.""" + + def __init__( + self, + charm: CharmBase, + relation_name: str = DEFAULT_RELATION_NAME, + alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, + recursive: bool = False, + skip_alert_topology_labeling: bool = False, + ): + super().__init__(charm, relation_name) + self._charm = charm + self._relation_name = relation_name + self.topology = JujuTopology.from_charm(charm) + + try: + alert_rules_path = _resolve_dir_against_charm_path(charm, alert_rules_path) + except InvalidAlertRulePathError as e: + logger.debug( + "Invalid Loki alert rules folder at %s: %s", + e.alert_rules_absolute_path, + e.message, + ) + self._alert_rules_path = alert_rules_path + self._skip_alert_topology_labeling = skip_alert_topology_labeling + + self._recursive = recursive + + def _handle_alert_rules(self, relation): + if not self._charm.unit.is_leader(): + return + + alert_rules = ( + AlertRules(None) if self._skip_alert_topology_labeling else AlertRules(self.topology) + ) + alert_rules.add_path(self._alert_rules_path, recursive=self._recursive) + alert_rules_as_dict = alert_rules.as_dict() + + relation.data[self._charm.app]["metadata"] = json.dumps(self.topology.as_dict()) + relation.data[self._charm.app]["alert_rules"] = json.dumps( + alert_rules_as_dict, + sort_keys=True, # sort, to prevent unnecessary relation_changed events + ) + + @property + def loki_endpoints(self) -> List[dict]: + """Fetch Loki Push API endpoints sent from LokiPushApiProvider through relation data. + + Returns: + A list of dictionaries with Loki Push API endpoints, for instance: + [ + {"url": "http://loki1:3100/loki/api/v1/push"}, + {"url": "http://loki2:3100/loki/api/v1/push"}, + ] + """ + endpoints = [] # type: list + + for relation in self._charm.model.relations[self._relation_name]: + for unit in relation.units: + if unit.app == self._charm.app: + # This is a peer unit + continue + + endpoint = relation.data[unit].get("endpoint") + if endpoint: + deserialized_endpoint = json.loads(endpoint) + endpoints.append(deserialized_endpoint) + + return endpoints + + +class LokiPushApiConsumer(ConsumerBase): + """Loki Consumer class.""" + + on = LokiPushApiEvents() # pyright: ignore + + def __init__( + self, + charm: CharmBase, + relation_name: str = DEFAULT_RELATION_NAME, + alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, + recursive: bool = True, + skip_alert_topology_labeling: bool = False, + ): + """Construct a Loki charm client. + + The `LokiPushApiConsumer` object provides configurations to a Loki client charm, such as + the Loki API endpoint to push logs. It is intended for workloads that can speak + loki_push_api (https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki), such + as grafana-agent. + (If you need to forward workload stdout logs, then use v1/loki_push_api.LogForwarder; if + you need to forward log files, then use LogProxyConsumer.) + + `LokiPushApiConsumer` can be instantiated as follows: + + self._loki_consumer = LokiPushApiConsumer(self) + + Args: + charm: a `CharmBase` object that manages this `LokiPushApiConsumer` object. + Typically, this is `self` in the instantiating class. + relation_name: the string name of the relation interface to look up. + If `charm` has exactly one relation with this interface, the relation's + name is returned. If none or multiple relations with the provided interface + are found, this method will raise either a NoRelationWithInterfaceFoundError or + MultipleRelationsWithInterfaceFoundError exception, respectively. + alert_rules_path: a string indicating a path where alert rules can be found + recursive: Whether to scan for rule files recursively. + skip_alert_topology_labeling: whether to skip the alert topology labeling. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `loki_push_api` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.provides` + role. + + Emits: + loki_push_api_endpoint_joined: This event is emitted when the relation between the + Charmed Operator that instantiates `LokiPushApiProvider` (Loki charm for instance) + and the Charmed Operator that instantiates `LokiPushApiConsumer` is established. + loki_push_api_endpoint_departed: This event is emitted when the relation between the + Charmed Operator that implements `LokiPushApiProvider` (Loki charm for instance) + and the Charmed Operator that implements `LokiPushApiConsumer` is removed. + loki_push_api_alert_rules_error: This event is emitted when an invalid alert rules + file is encountered or if `alert_rules_path` is empty. + """ + _validate_relation_by_interface_and_direction( + charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.requires + ) + super().__init__( + charm, relation_name, alert_rules_path, recursive, skip_alert_topology_labeling + ) + events = self._charm.on[relation_name] + self.framework.observe(self._charm.on.upgrade_charm, self._on_lifecycle_event) + self.framework.observe(events.relation_joined, self._on_logging_relation_joined) + self.framework.observe(events.relation_changed, self._on_logging_relation_changed) + self.framework.observe(events.relation_departed, self._on_logging_relation_departed) + + def _on_lifecycle_event(self, _: HookEvent): + """Update require relation data on charm upgrades and other lifecycle events. + + Args: + event: a `CharmEvent` in response to which the consumer + charm must update its relation data. + """ + # Upgrade event or other charm-level event + self._reinitialize_alert_rules() + self.on.loki_push_api_endpoint_joined.emit() + + def _on_logging_relation_joined(self, event: RelationJoinedEvent): + """Handle changes in related consumers. + + Update relation data and emit events when a relation is established. + + Args: + event: a `CharmEvent` in response to which the consumer + charm must update its relation data. + + Emits: + loki_push_api_endpoint_joined: Once the relation is established, this event is emitted. + loki_push_api_alert_rules_error: This event is emitted when an invalid alert rules + file is encountered or if `alert_rules_path` is empty. + """ + # Alert rules will not change over the lifecycle of a charm, and do not need to be + # constantly set on every relation_changed event. Leave them here. + self._handle_alert_rules(event.relation) + self.on.loki_push_api_endpoint_joined.emit() + + def _on_logging_relation_changed(self, event: RelationEvent): + """Handle changes in related consumers. + + Anytime there are changes in the relation between Loki + and its consumers charms. + + Args: + event: a `CharmEvent` in response to which the consumer + charm must update its relation data. + + Emits: + loki_push_api_endpoint_joined: Once the relation is established, this event is emitted. + loki_push_api_alert_rules_error: This event is emitted when an invalid alert rules + file is encountered or if `alert_rules_path` is empty. + """ + if self._charm.unit.is_leader(): + ev = json.loads(event.relation.data[event.app].get("event", "{}")) + + if ev: + valid = bool(ev.get("valid", True)) + errors = ev.get("errors", "") + + if valid and not errors: + self.on.alert_rule_status_changed.emit(valid=valid) + else: + self.on.alert_rule_status_changed.emit(valid=valid, errors=errors) + + self.on.loki_push_api_endpoint_joined.emit() + + def _reinitialize_alert_rules(self): + """Reloads alert rules and updates all relations.""" + for relation in self._charm.model.relations[self._relation_name]: + self._handle_alert_rules(relation) + + def _process_logging_relation_changed(self, relation: Relation): + self._handle_alert_rules(relation) + self.on.loki_push_api_endpoint_joined.emit() + + def _on_logging_relation_departed(self, _: RelationEvent): + """Handle departures in related providers. + + Anytime there are departures in relations between the consumer charm and Loki + the consumer charm is informed, through a `LokiPushApiEndpointDeparted` event. + The consumer charm can then choose to update its configuration. + """ + # Provide default to avoid throwing, as in some complicated scenarios with + # upgrades and hook failures we might not have data in the storage + self.on.loki_push_api_endpoint_departed.emit() + + +class ContainerNotFoundError(Exception): + """Raised if the specified container does not exist.""" + + def __init__(self): + msg = "The specified container does not exist." + self.message = msg + + super().__init__(self.message) + + +class MultipleContainersFoundError(Exception): + """Raised if no container name is passed but multiple containers are present.""" + + def __init__(self): + msg = ( + "No 'container_name' parameter has been specified; since this Charmed Operator" + " is has multiple containers, container_name must be specified for the container" + " to get logs from." + ) + self.message = msg + + super().__init__(self.message) + + +class PromtailDigestError(EventBase): + """Event emitted when there is an error with Promtail initialization.""" + + def __init__(self, handle, message): + super().__init__(handle) + self.message = message + + def snapshot(self): + """Save message information.""" + return {"message": self.message} + + def restore(self, snapshot): + """Restore message information.""" + self.message = snapshot["message"] + + +class LogProxyEndpointDeparted(EventBase): + """Event emitted when a Log Proxy has departed.""" + + +class LogProxyEndpointJoined(EventBase): + """Event emitted when a Log Proxy joins.""" + + +class LogProxyEvents(ObjectEvents): + """Event descriptor for events raised by `LogProxyConsumer`.""" + + promtail_digest_error = EventSource(PromtailDigestError) + log_proxy_endpoint_departed = EventSource(LogProxyEndpointDeparted) + log_proxy_endpoint_joined = EventSource(LogProxyEndpointJoined) + + +class LogProxyConsumer(ConsumerBase): + """LogProxyConsumer class. + + > Note: This object is deprecated. Consider migrating to v1/loki_push_api.LogForwarder with the + > release of Juju 3.6 LTS. + + The `LogProxyConsumer` object provides a method for attaching `promtail` to + a workload in order to generate structured logging data from applications + which traditionally log to syslog or do not have native Loki integration. + The `LogProxyConsumer` can be instantiated as follows: + + self._log_proxy_consumer = LogProxyConsumer(self, log_files=["/var/log/messages"]) + + Args: + charm: a `CharmBase` object that manages this `LokiPushApiConsumer` object. + Typically, this is `self` in the instantiating class. + log_files: a list of log files to monitor with Promtail. + relation_name: the string name of the relation interface to look up. + If `charm` has exactly one relation with this interface, the relation's + name is returned. If none or multiple relations with the provided interface + are found, this method will raise either a NoRelationWithInterfaceFoundError or + MultipleRelationsWithInterfaceFoundError exception, respectively. + enable_syslog: Whether to enable syslog integration. + syslog_port: The port syslog is attached to. + alert_rules_path: an optional path for the location of alert rules + files. Defaults to "./src/loki_alert_rules", + resolved from the directory hosting the charm entry file. + The alert rules are automatically updated on charm upgrade. + recursive: Whether to scan for rule files recursively. + container_name: An optional container name to inject the payload into. + promtail_resource_name: An optional promtail resource name from metadata + if it has been modified and attached + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `loki_push_api` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.provides` + role. + """ + + on = LogProxyEvents() # pyright: ignore + + def __init__( + self, + charm, + log_files: Optional[Union[List[str], str]] = None, + relation_name: str = DEFAULT_LOG_PROXY_RELATION_NAME, + enable_syslog: bool = False, + syslog_port: int = 1514, + alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, + recursive: bool = False, + container_name: str = "", + promtail_resource_name: Optional[str] = None, + *, # TODO: In v1, move the star up so everything after 'charm' is a kwarg + insecure_skip_verify: bool = False, + ): + super().__init__(charm, relation_name, alert_rules_path, recursive) + self._charm = charm + self._relation_name = relation_name + self._container = self._get_container(container_name) + self._container_name = self._get_container_name(container_name) + + if not log_files: + log_files = [] + elif isinstance(log_files, str): + log_files = [log_files] + elif not isinstance(log_files, list) or not all((isinstance(x, str) for x in log_files)): + raise TypeError("The 'log_files' argument must be a list of strings.") + self._log_files = log_files + + self._syslog_port = syslog_port + self._is_syslog = enable_syslog + self.topology = JujuTopology.from_charm(charm) + self._promtail_resource_name = promtail_resource_name or "promtail-bin" + self.insecure_skip_verify = insecure_skip_verify + + # architecture used for promtail binary + arch = platform.processor() + self._arch = "amd64" if arch == "x86_64" else arch + + events = self._charm.on[relation_name] + self.framework.observe(events.relation_created, self._on_relation_created) + self.framework.observe(events.relation_changed, self._on_relation_changed) + self.framework.observe(events.relation_departed, self._on_relation_departed) + # turn the container name to a valid Python identifier + snake_case_container_name = self._container_name.replace("-", "_") + self.framework.observe( + getattr(self._charm.on, "{}_pebble_ready".format(snake_case_container_name)), + self._on_pebble_ready, + ) + + def _on_pebble_ready(self, _: WorkloadEvent): + """Event handler for `pebble_ready`.""" + if self.model.relations[self._relation_name]: + self._setup_promtail() + + def _on_relation_created(self, _: RelationCreatedEvent) -> None: + """Event handler for `relation_created`.""" + if not self._container.can_connect(): + return + self._setup_promtail() + + def _on_relation_changed(self, event: RelationEvent) -> None: + """Event handler for `relation_changed`. + + Args: + event: The event object `RelationChangedEvent`. + """ + self._handle_alert_rules(event.relation) + + if self._charm.unit.is_leader(): + ev = json.loads(event.relation.data[event.app].get("event", "{}")) + + if ev: + valid = bool(ev.get("valid", True)) + errors = ev.get("errors", "") + + if valid and not errors: + self.on.alert_rule_status_changed.emit(valid=valid) + else: + self.on.alert_rule_status_changed.emit(valid=valid, errors=errors) + + if not self._container.can_connect(): + return + if self.model.relations[self._relation_name]: + if "promtail" not in self._container.get_plan().services: + self._setup_promtail() + return + + new_config = self._promtail_config + if new_config != self._current_config: + self._container.push( + WORKLOAD_CONFIG_PATH, yaml.safe_dump(new_config), make_dirs=True + ) + + # Loki may send endpoints late. Don't necessarily start, there may be + # no clients + if new_config["clients"]: + self._container.restart(WORKLOAD_SERVICE_NAME) + self.on.log_proxy_endpoint_joined.emit() + else: + self.on.promtail_digest_error.emit("No promtail client endpoints available!") + + def _on_relation_departed(self, _: RelationEvent) -> None: + """Event handler for `relation_departed`. + + Args: + event: The event object `RelationDepartedEvent`. + """ + if not self._container.can_connect(): + return + if not self._charm.model.relations[self._relation_name]: + self._container.stop(WORKLOAD_SERVICE_NAME) + return + + new_config = self._promtail_config + if new_config != self._current_config: + self._container.push(WORKLOAD_CONFIG_PATH, yaml.safe_dump(new_config), make_dirs=True) + + if new_config["clients"]: + self._container.restart(WORKLOAD_SERVICE_NAME) + else: + self._container.stop(WORKLOAD_SERVICE_NAME) + self.on.log_proxy_endpoint_departed.emit() + + def _get_container(self, container_name: str = "") -> Container: # pyright: ignore + """Gets a single container by name or using the only container running in the Pod. + + If there is more than one container in the Pod a `PromtailDigestError` is emitted. + + Args: + container_name: The container name. + + Returns: + A `ops.model.Container` object representing the container. + + Emits: + PromtailDigestError, if there was a problem obtaining a container. + """ + try: + container_name = self._get_container_name(container_name) + return self._charm.unit.get_container(container_name) + except (MultipleContainersFoundError, ContainerNotFoundError, ModelError) as e: + msg = str(e) + logger.warning(msg) + self.on.promtail_digest_error.emit(msg) + + def _get_container_name(self, container_name: str = "") -> str: + """Helper function for getting/validating a container name. + + Args: + container_name: The container name to be validated (optional). + + Returns: + container_name: The same container_name that was passed (if it exists) or the only + container name that is present (if no container_name was passed). + + Raises: + ContainerNotFoundError, if container_name does not exist. + MultipleContainersFoundError, if container_name was not provided but multiple + containers are present. + """ + containers = dict(self._charm.model.unit.containers) + if len(containers) == 0: + raise ContainerNotFoundError + + if not container_name: + # container_name was not provided - will get it ourselves, if it is the only one + if len(containers) > 1: + raise MultipleContainersFoundError + + # Get the first key in the containers' dict. + # Need to "cast", otherwise: + # error: Incompatible return value type (got "Optional[str]", expected "str") + container_name = cast(str, next(iter(containers.keys()))) + + elif container_name not in containers: + raise ContainerNotFoundError + + return container_name + + def _add_pebble_layer(self, workload_binary_path: str) -> None: + """Adds Pebble layer that manages Promtail service in Workload container. + + Args: + workload_binary_path: string providing path to promtail binary in workload container. + """ + pebble_layer = { + "summary": "promtail layer", + "description": "pebble config layer for promtail", + "services": { + WORKLOAD_SERVICE_NAME: { + "override": "replace", + "summary": WORKLOAD_SERVICE_NAME, + "command": "{} {}".format(workload_binary_path, self._cli_args), + "startup": "disabled", + } + }, + } + self._container.add_layer( + self._container_name, pebble_layer, combine=True # pyright: ignore + ) + + def _create_directories(self) -> None: + """Creates the directories for Promtail binary and config file.""" + self._container.make_dir(path=WORKLOAD_BINARY_DIR, make_parents=True) + self._container.make_dir(path=WORKLOAD_CONFIG_DIR, make_parents=True) + + def _obtain_promtail(self, promtail_info: dict) -> None: + """Obtain promtail binary from an attached resource or download it. + + Args: + promtail_info: dictionary containing information about promtail binary + that must be used. The dictionary must have three keys + - "filename": filename of promtail binary + - "zipsha": sha256 sum of zip file of promtail binary + - "binsha": sha256 sum of unpacked promtail binary + """ + workload_binary_path = os.path.join(WORKLOAD_BINARY_DIR, promtail_info["filename"]) + if self._promtail_attached_as_resource: + self._push_promtail_if_attached(workload_binary_path) + return + + if self._promtail_must_be_downloaded(promtail_info): + self._download_and_push_promtail_to_workload(promtail_info) + else: + binary_path = os.path.join(BINARY_DIR, promtail_info["filename"]) + self._push_binary_to_workload(binary_path, workload_binary_path) + + def _push_binary_to_workload(self, binary_path: str, workload_binary_path: str) -> None: + """Push promtail binary into workload container. + + Args: + binary_path: path in charm container from which promtail binary is read. + workload_binary_path: path in workload container to which promtail binary is pushed. + """ + with open(binary_path, "rb") as f: + self._container.push( + workload_binary_path, + f, + permissions=0o755, + encoding=None, # pyright: ignore + make_dirs=True, + ) + logger.debug("The promtail binary file has been pushed to the workload container.") + + @property + def _promtail_attached_as_resource(self) -> bool: + """Checks whether Promtail binary is attached to the charm or not. + + Returns: + a boolean representing whether Promtail binary is attached as a resource or not. + """ + try: + self._charm.model.resources.fetch(self._promtail_resource_name) + return True + except ModelError: + return False + except NameError as e: + if "invalid resource name" in str(e): + return False + raise + + def _push_promtail_if_attached(self, workload_binary_path: str) -> bool: + """Checks whether Promtail binary is attached to the charm or not. + + Args: + workload_binary_path: string specifying expected path of promtail + in workload container + + Returns: + a boolean representing whether Promtail binary is attached or not. + """ + logger.info("Promtail binary file has been obtained from an attached resource.") + resource_path = self._charm.model.resources.fetch(self._promtail_resource_name) + self._push_binary_to_workload(resource_path, workload_binary_path) + return True + + def _promtail_must_be_downloaded(self, promtail_info: dict) -> bool: + """Checks whether promtail binary must be downloaded or not. + + Args: + promtail_info: dictionary containing information about promtail binary + that must be used. The dictionary must have three keys + - "filename": filename of promtail binary + - "zipsha": sha256 sum of zip file of promtail binary + - "binsha": sha256 sum of unpacked promtail binary + + Returns: + a boolean representing whether Promtail binary must be downloaded or not. + """ + binary_path = os.path.join(BINARY_DIR, promtail_info["filename"]) + if not self._is_promtail_binary_in_charm(binary_path): + return True + + if not self._sha256sums_matches(binary_path, promtail_info["binsha"]): + return True + + logger.debug("Promtail binary file is already in the the charm container.") + return False + + def _sha256sums_matches(self, file_path: str, sha256sum: str) -> bool: + """Checks whether a file's sha256sum matches or not with a specific sha256sum. + + Args: + file_path: A string representing the files' patch. + sha256sum: The sha256sum against which we want to verify. + + Returns: + a boolean representing whether a file's sha256sum matches or not with + a specific sha256sum. + """ + try: + with open(file_path, "rb") as f: + file_bytes = f.read() + result = sha256(file_bytes).hexdigest() + + if result != sha256sum: + msg = "File sha256sum mismatch, expected:'{}' but got '{}'".format( + sha256sum, result + ) + logger.debug(msg) + return False + + return True + except (APIError, FileNotFoundError): + msg = "File: '{}' could not be opened".format(file_path) + logger.error(msg) + return False + + def _is_promtail_binary_in_charm(self, binary_path: str) -> bool: + """Check if Promtail binary is already stored in charm container. + + Args: + binary_path: string path of promtail binary to check + + Returns: + a boolean representing whether Promtail is present or not. + """ + return True if Path(binary_path).is_file() else False + + def _download_and_push_promtail_to_workload(self, promtail_info: dict) -> None: + """Downloads a Promtail zip file and pushes the binary to the workload. + + Args: + promtail_info: dictionary containing information about promtail binary + that must be used. The dictionary must have three keys + - "filename": filename of promtail binary + - "zipsha": sha256 sum of zip file of promtail binary + - "binsha": sha256 sum of unpacked promtail binary + """ + # Check for Juju proxy variables and fall back to standard ones if not set + # If no Juju proxy variable was set, we set proxies to None to let the ProxyHandler get + # the proxy env variables from the environment + proxies = { + # The ProxyHandler uses only the protocol names as keys + # https://docs.python.org/3/library/urllib.request.html#urllib.request.ProxyHandler + "https": os.environ.get("JUJU_CHARM_HTTPS_PROXY", ""), + "http": os.environ.get("JUJU_CHARM_HTTP_PROXY", ""), + # The ProxyHandler uses `no` for the no_proxy key + # https://github.com/python/cpython/blob/3.12/Lib/urllib/request.py#L2553 + "no": os.environ.get("JUJU_CHARM_NO_PROXY", ""), + } + proxies = {k: v for k, v in proxies.items() if v != ""} or None + + proxy_handler = request.ProxyHandler(proxies) + opener = request.build_opener(proxy_handler) + + with opener.open(promtail_info["url"]) as r: + file_bytes = r.read() + file_path = os.path.join(BINARY_DIR, promtail_info["filename"] + ".gz") + with open(file_path, "wb") as f: + f.write(file_bytes) + logger.info( + "Promtail binary zip file has been downloaded and stored in: %s", + file_path, + ) + + decompressed_file = GzipFile(fileobj=BytesIO(file_bytes)) + binary_path = os.path.join(BINARY_DIR, promtail_info["filename"]) + with open(binary_path, "wb") as outfile: + outfile.write(decompressed_file.read()) + logger.debug("Promtail binary file has been downloaded.") + + workload_binary_path = os.path.join(WORKLOAD_BINARY_DIR, promtail_info["filename"]) + self._push_binary_to_workload(binary_path, workload_binary_path) + + @property + def _cli_args(self) -> str: + """Return the cli arguments to pass to promtail. + + Returns: + The arguments as a string + """ + return "-config.file={}".format(WORKLOAD_CONFIG_PATH) + + @property + def _current_config(self) -> dict: + """Property that returns the current Promtail configuration. + + Returns: + A dict containing Promtail configuration. + """ + if not self._container.can_connect(): + logger.debug("Could not connect to promtail container!") + return {} + try: + raw_current = self._container.pull(WORKLOAD_CONFIG_PATH).read() + return yaml.safe_load(raw_current) + except (ProtocolError, PathError) as e: + logger.warning( + "Could not check the current promtail configuration due to " + "a failure in retrieving the file: %s", + e, + ) + return {} + + @property + def _promtail_config(self) -> dict: + """Generates the config file for Promtail. + + Reference: https://grafana.com/docs/loki/latest/send-data/promtail/configuration + """ + config = {"clients": self._clients_list()} + if self.insecure_skip_verify: + for client in config["clients"]: + client["tls_config"] = {"insecure_skip_verify": True} + + config.update(self._server_config()) + config.update(self._positions()) + config.update(self._scrape_configs()) + return config + + def _clients_list(self) -> list: + """Generates a list of clients for use in the promtail config. + + Returns: + A list of endpoints + """ + return self.loki_endpoints + + def _server_config(self) -> dict: + """Generates the server section of the Promtail config file. + + Returns: + A dict representing the `server` section. + """ + return { + "server": { + "http_listen_port": HTTP_LISTEN_PORT, + "grpc_listen_port": GRPC_LISTEN_PORT, + } + } + + def _positions(self) -> dict: + """Generates the positions section of the Promtail config file. + + Returns: + A dict representing the `positions` section. + """ + return {"positions": {"filename": WORKLOAD_POSITIONS_PATH}} + + def _scrape_configs(self) -> dict: + """Generates the scrape_configs section of the Promtail config file. + + Returns: + A dict representing the `scrape_configs` section. + """ + job_name = "juju_{}".format(self.topology.identifier) + + # The new JujuTopology doesn't include unit, but LogProxyConsumer should have it + common_labels = { + "juju_{}".format(k): v + for k, v in self.topology.as_dict(remapped_keys={"charm_name": "charm"}).items() + } + scrape_configs = [] + + # Files config + labels = common_labels.copy() + labels.update( + { + "job": job_name, + "__path__": "", + } + ) + config = {"targets": ["localhost"], "labels": labels} + scrape_config = { + "job_name": "system", + "static_configs": self._generate_static_configs(config), + } + scrape_configs.append(scrape_config) + + # Syslog config + if self._is_syslog: + relabel_mappings = [ + "severity", + "facility", + "hostname", + "app_name", + "proc_id", + "msg_id", + ] + syslog_labels = common_labels.copy() + syslog_labels.update({"job": "{}_syslog".format(job_name)}) + syslog_config = { + "job_name": "syslog", + "syslog": { + "listen_address": "127.0.0.1:{}".format(self._syslog_port), + "label_structured_data": True, + "labels": syslog_labels, + }, + "relabel_configs": [ + {"source_labels": ["__syslog_message_{}".format(val)], "target_label": val} + for val in relabel_mappings + ] + + [{"action": "labelmap", "regex": "__syslog_message_sd_(.+)"}], + } + scrape_configs.append(syslog_config) # type: ignore + + return {"scrape_configs": scrape_configs} + + def _generate_static_configs(self, config: dict) -> list: + """Generates static_configs section. + + Returns: + - a list of dictionaries representing static_configs section + """ + static_configs = [] + + for _file in self._log_files: + conf = deepcopy(config) + conf["labels"]["__path__"] = _file + static_configs.append(conf) + + return static_configs + + def _setup_promtail(self) -> None: + # Use the first + relations = self._charm.model.relations[self._relation_name] + if len(relations) > 1: + logger.debug( + "Multiple log_proxy relations. Getting Promtail from application {}".format( + relations[0].app.name + ) + ) + relation = relations[0] + promtail_binaries = json.loads( + relation.data[relation.app].get("promtail_binary_zip_url", "{}") + ) + if not promtail_binaries: + return + + if not self._is_promtail_installed(promtail_binaries[self._arch]): + try: + self._obtain_promtail(promtail_binaries[self._arch]) + except HTTPError as e: + msg = "Promtail binary couldn't be downloaded - {}".format(str(e)) + logger.warning(msg) + self.on.promtail_digest_error.emit(msg) + return + + workload_binary_path = os.path.join( + WORKLOAD_BINARY_DIR, promtail_binaries[self._arch]["filename"] + ) + + self._create_directories() + self._container.push( + WORKLOAD_CONFIG_PATH, yaml.safe_dump(self._promtail_config), make_dirs=True + ) + + self._add_pebble_layer(workload_binary_path) + + if self._current_config.get("clients"): + try: + self._container.restart(WORKLOAD_SERVICE_NAME) + except ChangeError as e: + self.on.promtail_digest_error.emit(str(e)) + else: + self.on.log_proxy_endpoint_joined.emit() + else: + self.on.promtail_digest_error.emit("No promtail client endpoints available!") + + def _is_promtail_installed(self, promtail_info: dict) -> bool: + """Determine if promtail has already been installed to the container. + + Args: + promtail_info: dictionary containing information about promtail binary + that must be used. The dictionary must at least contain a key + "filename" giving the name of promtail binary + """ + workload_binary_path = "{}/{}".format(WORKLOAD_BINARY_DIR, promtail_info["filename"]) + try: + self._container.list_files(workload_binary_path) + except (APIError, FileNotFoundError): + return False + return True + + @property + def syslog_port(self) -> str: + """Gets the port on which promtail is listening for syslog. + + Returns: + A str representing the port + """ + return str(self._syslog_port) + + @property + def rsyslog_config(self) -> str: + """Generates a config line for use with rsyslog. + + Returns: + The rsyslog config line as a string + """ + return 'action(type="omfwd" protocol="tcp" target="127.0.0.1" port="{}" Template="RSYSLOG_SyslogProtocol23Format" TCP_Framing="octet-counted")'.format( + self._syslog_port + ) + + +class CosTool: + """Uses cos-tool to inject label matchers into alert rule expressions and validate rules.""" + + _path = None + _disabled = False + + def __init__(self, charm): + self._charm = charm + + @property + def path(self): + """Lazy lookup of the path of cos-tool.""" + if self._disabled: + return None + if not self._path: + self._path = self._get_tool_path() + if not self._path: + logger.debug("Skipping injection of juju topology as label matchers") + self._disabled = True + return self._path + + def apply_label_matchers(self, rules) -> dict: + """Will apply label matchers to the expression of all alerts in all supplied groups.""" + if not self.path: + return rules + for group in rules["groups"]: + rules_in_group = group.get("rules", []) + for rule in rules_in_group: + topology = {} + # if the user for some reason has provided juju_unit, we'll need to honor it + # in most cases, however, this will be empty + for label in [ + "juju_model", + "juju_model_uuid", + "juju_application", + "juju_charm", + "juju_unit", + ]: + if label in rule["labels"]: + topology[label] = rule["labels"][label] + + rule["expr"] = self.inject_label_matchers(rule["expr"], topology) + return rules + + def validate_alert_rules(self, rules: dict) -> Tuple[bool, str]: + """Will validate correctness of alert rules, returning a boolean and any errors.""" + if not self.path: + logger.debug("`cos-tool` unavailable. Not validating alert correctness.") + return True, "" + + with tempfile.TemporaryDirectory() as tmpdir: + rule_path = Path(tmpdir + "/validate_rule.yaml") + + # Smash "our" rules format into what upstream actually uses, which is more like: + # + # groups: + # - name: foo + # rules: + # - alert: SomeAlert + # expr: up + # - alert: OtherAlert + # expr: up + transformed_rules = {"groups": []} # type: ignore + for rule in rules["groups"]: + transformed_rules["groups"].append(rule) + + rule_path.write_text(yaml.dump(transformed_rules)) + args = [str(self.path), "--format", "logql", "validate", str(rule_path)] + # noinspection PyBroadException + try: + self._exec(args) + return True, "" + except subprocess.CalledProcessError as e: + logger.debug("Validating the rules failed: %s", e.output) + return False, ", ".join([line for line in e.output if "error validating" in line]) + + def inject_label_matchers(self, expression, topology) -> str: + """Add label matchers to an expression.""" + if not topology: + return expression + if not self.path: + logger.debug("`cos-tool` unavailable. Leaving expression unchanged: %s", expression) + return expression + args = [str(self.path), "--format", "logql", "transform"] + args.extend( + ["--label-matcher={}={}".format(key, value) for key, value in topology.items()] + ) + + args.extend(["{}".format(expression)]) + # noinspection PyBroadException + try: + return self._exec(args) + except subprocess.CalledProcessError as e: + logger.debug('Applying the expression failed: "%s", falling back to the original', e) + print('Applying the expression failed: "{}", falling back to the original'.format(e)) + return expression + + def _get_tool_path(self) -> Optional[Path]: + arch = platform.processor() + arch = "amd64" if arch == "x86_64" else arch + res = "cos-tool-{}".format(arch) + try: + path = Path(res).resolve() + path.chmod(0o777) + return path + except NotImplementedError: + logger.debug("System lacks support for chmod") + except FileNotFoundError: + logger.debug('Could not locate cos-tool at: "{}"'.format(res)) + return None + + def _exec(self, cmd) -> str: + result = subprocess.run(cmd, check=True, stdout=subprocess.PIPE) + output = result.stdout.decode("utf-8").strip() + return output diff --git a/examples/django/charm/lib/charms/loki_k8s/v1/loki_push_api.py b/examples/django/charm/lib/charms/loki_k8s/v1/loki_push_api.py index 7f8372c..d75cb7e 100644 --- a/examples/django/charm/lib/charms/loki_k8s/v1/loki_push_api.py +++ b/examples/django/charm/lib/charms/loki_k8s/v1/loki_push_api.py @@ -480,6 +480,25 @@ def _alert_rules_error(self, event): Units of consumer charm send their alert rules over app relation data using the `alert_rules` key. + +## Charm logging +The `charms.loki_k8s.v0.charm_logging` library can be used in conjunction with this one to configure python's +logging module to forward all logs to Loki via the loki-push-api interface. + +```python +from lib.charms.loki_k8s.v0.charm_logging import log_charm +from lib.charms.loki_k8s.v1.loki_push_api import charm_logging_config, LokiPushApiConsumer + +@log_charm(logging_endpoint="my_endpoints", server_cert="cert_path") +class MyCharm(...): + _cert_path = "/path/to/cert/on/charm/container.crt" + def __init__(self, ...): + self.logging = LokiPushApiConsumer(...) + self.my_endpoints, self.cert_path = charm_logging_config( + self.logging, self._cert_path) +``` + +Do this, and all charm logs will be forwarded to Loki as soon as a relation is formed. """ import json @@ -527,7 +546,7 @@ def _alert_rules_error(self, event): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 12 +LIBPATCH = 13 PYDEPS = ["cosl"] @@ -577,7 +596,11 @@ def _alert_rules_error(self, event): GRPC_LISTEN_PORT_START = 9095 # odd start port -class RelationNotFoundError(ValueError): +class LokiPushApiError(Exception): + """Base class for errors raised by this module.""" + + +class RelationNotFoundError(LokiPushApiError): """Raised if there is no relation with the given name.""" def __init__(self, relation_name: str): @@ -587,7 +610,7 @@ def __init__(self, relation_name: str): super().__init__(self.message) -class RelationInterfaceMismatchError(Exception): +class RelationInterfaceMismatchError(LokiPushApiError): """Raised if the relation with the given name has a different interface.""" def __init__( @@ -607,7 +630,7 @@ def __init__( super().__init__(self.message) -class RelationRoleMismatchError(Exception): +class RelationRoleMismatchError(LokiPushApiError): """Raised if the relation with the given name has a different direction.""" def __init__( @@ -2555,7 +2578,7 @@ def _on_pebble_ready(self, event: PebbleReadyEvent): self._update_endpoints(event.workload, loki_endpoints) - def _update_logging(self, _): + def _update_logging(self, event: RelationEvent): """Update the log forwarding to match the active Loki endpoints.""" if not (loki_endpoints := self._retrieve_endpoints_from_relation()): logger.warning("No Loki endpoints available") @@ -2566,6 +2589,8 @@ def _update_logging(self, _): self._update_endpoints(container, loki_endpoints) # else: `_update_endpoints` will be called on pebble-ready anyway. + self._handle_alert_rules(event.relation) + def _retrieve_endpoints_from_relation(self) -> dict: loki_endpoints = {} @@ -2750,3 +2775,49 @@ def _exec(self, cmd) -> str: result = subprocess.run(cmd, check=True, stdout=subprocess.PIPE) output = result.stdout.decode("utf-8").strip() return output + + +def charm_logging_config( + endpoint_requirer: LokiPushApiConsumer, cert_path: Optional[Union[Path, str]] +) -> Tuple[Optional[List[str]], Optional[str]]: + """Utility function to determine the charm_logging config you will likely want. + + If no endpoint is provided: + disable charm logging. + If https endpoint is provided but cert_path is not found on disk: + disable charm logging. + If https endpoint is provided and cert_path is None: + ERROR + Else: + proceed with charm logging (with or without tls, as appropriate) + + Args: + endpoint_requirer: an instance of LokiPushApiConsumer. + cert_path: a path where a cert is stored. + + Returns: + A tuple with (optionally) the values of the endpoints and the certificate path. + + Raises: + LokiPushApiError: if some endpoint are http and others https. + """ + endpoints = [ep["url"] for ep in endpoint_requirer.loki_endpoints] + if not endpoints: + return None, None + + https = tuple(endpoint.startswith("https://") for endpoint in endpoints) + + if all(https): # all endpoints are https + if cert_path is None: + raise LokiPushApiError("Cannot send logs to https endpoints without a certificate.") + if not Path(cert_path).exists(): + # if endpoints is https BUT we don't have a server_cert yet: + # disable charm logging until we do to prevent tls errors + return None, None + return endpoints, str(cert_path) + + if all(not x for x in https): # all endpoints are http + return endpoints, None + + # if there's a disagreement, that's very weird: + raise LokiPushApiError("Some endpoints are http, some others are https. That's not good.") diff --git a/examples/django/charm/lib/charms/prometheus_k8s/v0/prometheus_scrape.py b/examples/django/charm/lib/charms/prometheus_k8s/v0/prometheus_scrape.py index e3d35c6..ca554fb 100644 --- a/examples/django/charm/lib/charms/prometheus_k8s/v0/prometheus_scrape.py +++ b/examples/django/charm/lib/charms/prometheus_k8s/v0/prometheus_scrape.py @@ -362,7 +362,7 @@ def _on_scrape_targets_changed(self, event): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 47 +LIBPATCH = 48 PYDEPS = ["cosl"] @@ -2364,12 +2364,9 @@ def _get_tool_path(self) -> Optional[Path]: arch = "amd64" if arch == "x86_64" else arch res = "cos-tool-{}".format(arch) try: - path = Path(res).resolve() - path.chmod(0o777) + path = Path(res).resolve(strict=True) return path - except NotImplementedError: - logger.debug("System lacks support for chmod") - except FileNotFoundError: + except (FileNotFoundError, OSError): logger.debug('Could not locate cos-tool at: "{}"'.format(res)) return None diff --git a/examples/django/charm/lib/charms/redis_k8s/v0/redis.py b/examples/django/charm/lib/charms/redis_k8s/v0/redis.py index a731507..e28b14c 100644 --- a/examples/django/charm/lib/charms/redis_k8s/v0/redis.py +++ b/examples/django/charm/lib/charms/redis_k8s/v0/redis.py @@ -39,7 +39,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version. -LIBPATCH = 6 +LIBPATCH = 7 logger = logging.getLogger(__name__) @@ -78,6 +78,18 @@ def _on_relation_broken(self, event): # Trigger an event that our charm can react to. self.charm.on.redis_relation_updated.emit() + @property + def app_data(self) -> Optional[Dict[str, str]]: + """Retrieve the app data. + + Returns: + Dict: dict containing the app data. + """ + relation = self.model.get_relation(self.relation_name) + if not relation: + return None + return relation.data[relation.app] + @property def relation_data(self) -> Optional[Dict[str, str]]: """Retrieve the relation data. @@ -98,10 +110,16 @@ def url(self) -> Optional[str]: Returns: str: the Redis URL. """ - relation_data = self.relation_data - if not relation_data: + if not (relation_data := self.relation_data): return None + redis_host = relation_data.get("hostname") + + if app_data := self.app_data: + try: + redis_host = self.app_data.get("leader-host", redis_host) + except KeyError: + pass redis_port = relation_data.get("port") return f"redis://{redis_host}:{redis_port}" diff --git a/examples/django/charm/lib/charms/saml_integrator/v0/saml.py b/examples/django/charm/lib/charms/saml_integrator/v0/saml.py new file mode 100644 index 0000000..8fd1610 --- /dev/null +++ b/examples/django/charm/lib/charms/saml_integrator/v0/saml.py @@ -0,0 +1,347 @@ +#!/usr/bin/env python3 + +# Copyright 2024 Canonical Ltd. +# Licensed under the Apache2.0. See LICENSE file in charm source for details. + +"""Library to manage the relation data for the SAML Integrator charm. + +This library contains the Requires and Provides classes for handling the relation +between an application and a charm providing the `saml`relation. +It also contains a `SamlRelationData` class to wrap the SAML data that will +be shared via the relation. + +### Requirer Charm + +```python + +from charms.saml_integrator.v0 import SamlDataAvailableEvent, SamlRequires + +class SamlRequirerCharm(ops.CharmBase): + def __init__(self, *args): + super().__init__(*args) + self.saml = saml.SamlRequires(self) + self.framework.observe(self.saml.on.saml_data_available, self._handler) + ... + + def _handler(self, events: SamlDataAvailableEvent) -> None: + ... + +``` + +As shown above, the library provides a custom event to handle the scenario in +which new SAML data has been added or updated. + +### Provider Charm + +Following the previous example, this is an example of the provider charm. + +```python +from charms.saml_integrator.v0 import SamlDataAvailableEvent, SamlRequires + +class SamlRequirerCharm(ops.CharmBase): + def __init__(self, *args): + super().__init__(*args) + self.saml = SamlRequires(self) + self.framework.observe(self.saml.on.saml_data_available, self._on_saml_data_available) + ... + + def _on_saml_data_available(self, events: SamlDataAvailableEvent) -> None: + ... + + def __init__(self, *args): + super().__init__(*args) + self.saml = SamlProvides(self) + +``` +The SamlProvides object wraps the list of relations into a `relations` property +and provides an `update_relation_data` method to update the relation data by passing +a `SamlRelationData` data object. +Additionally, SamlRelationData can be used to directly parse the relation data with the +class method `from_relation_data`. +""" + +# The unique Charmhub library identifier, never change it +LIBID = "511cdfa7de3d43568bf9b512f9c9f89d" + +# Increment this major API version when introducing breaking changes +LIBAPI = 0 + +# Increment this PATCH version before using `charmcraft publish-lib` or reset +# to 0 if you are raising the major API version +LIBPATCH = 10 + +# pylint: disable=wrong-import-position +import re +import typing + +import ops +from pydantic import AnyHttpUrl, BaseModel, Field +from pydantic.tools import parse_obj_as + +DEFAULT_RELATION_NAME = "saml" + + +class SamlEndpoint(BaseModel): + """Represent a SAML endpoint. + + Attrs: + name: Endpoint name. + url: Endpoint URL. + binding: Endpoint binding. + response_url: URL to address the response to. + """ + + name: str = Field(..., min_length=1) + url: typing.Optional[AnyHttpUrl] + binding: str = Field(..., min_length=1) + response_url: typing.Optional[AnyHttpUrl] + + def to_relation_data(self) -> typing.Dict[str, str]: + """Convert an instance of SamlEndpoint to the relation representation. + + Returns: + Dict containing the representation. + """ + result: typing.Dict[str, str] = {} + # Get the HTTP method from the SAML binding + http_method = self.binding.split(":")[-1].split("-")[-1].lower() + # Transform name into snakecase + lowercase_name = re.sub(r"(? "SamlEndpoint": + """Initialize a new instance of the SamlEndpoint class from the relation data. + + Args: + relation_data: the relation data. + + Returns: + A SamlEndpoint instance. + """ + url_key = "" + for key in relation_data: + # A key per method and entpoint type that is always present + if key.endswith("_redirect_url") or key.endswith("_post_url"): + url_key = key + # Get endpoint name from the relation data key + lowercase_name = "_".join(url_key.split("_")[:-2]) + name = "".join(x.capitalize() for x in lowercase_name.split("_")) + # Get HTTP method from the relation data key + http_method = url_key.split("_")[-2] + prefix = f"{lowercase_name}_{http_method}_" + return cls( + name=name, + url=( + parse_obj_as(AnyHttpUrl, relation_data[f"{prefix}url"]) + if relation_data[f"{prefix}url"] + else None + ), + binding=relation_data[f"{prefix}binding"], + response_url=( + parse_obj_as(AnyHttpUrl, relation_data[f"{prefix}response_url"]) + if f"{prefix}response_url" in relation_data + else None + ), + ) + + +class SamlRelationData(BaseModel): + """Represent the relation data. + + Attrs: + entity_id: SAML entity ID. + metadata_url: URL to the metadata. + certificates: Tuple of SAML certificates. + endpoints: Tuple of SAML endpoints. + """ + + entity_id: str = Field(..., min_length=1) + metadata_url: typing.Optional[AnyHttpUrl] + certificates: typing.Tuple[str, ...] + endpoints: typing.Tuple[SamlEndpoint, ...] + + def to_relation_data(self) -> typing.Dict[str, str]: + """Convert an instance of SamlDataAvailableEvent to the relation representation. + + Returns: + Dict containing the representation. + """ + result = { + "entity_id": self.entity_id, + "x509certs": ",".join(self.certificates), + } + if self.metadata_url: + result["metadata_url"] = str(self.metadata_url) + for endpoint in self.endpoints: + result.update(endpoint.to_relation_data()) + return result + + @classmethod + def from_relation_data(cls, relation_data: ops.RelationDataContent) -> "SamlRelationData": + """Get a SamlRelationData wrapping the relation data. + + Arguments: + relation_data: the relation data. + + Returns: a SamlRelationData instance with the relation data. + """ + # mypy is not aware of the relation data being present + endpoints = [ + SamlEndpoint.from_relation_data( + { + key2: relation_data.get(key2) # type: ignore + for key2 in relation_data + if key2.startswith("_".join(key.split("_")[:-1])) + } + ) + for key in relation_data + if key.endswith("_redirect_url") or key.endswith("_post_url") + ] + endpoints.sort(key=lambda ep: ep.name) + return cls( + entity_id=relation_data.get("entity_id"), # type: ignore + metadata_url=( + parse_obj_as(AnyHttpUrl, relation_data.get("metadata_url")) + if relation_data.get("metadata_url") + else None + ), # type: ignore + certificates=tuple(relation_data.get("x509certs").split(",")), # type: ignore + endpoints=tuple(endpoints), + ) + + +class SamlDataAvailableEvent(ops.RelationEvent): + """Saml event emitted when relation data has changed. + + Attrs: + saml_relation_data: the SAML relation data + entity_id: SAML entity ID. + metadata_url: URL to the metadata. + certificates: Tuple containing the SAML certificates. + endpoints: Tuple containing the SAML endpoints. + """ + + @property + def saml_relation_data(self) -> SamlRelationData: + """Get a SamlRelationData for the relation data.""" + assert self.relation.app + return SamlRelationData.from_relation_data(self.relation.data[self.relation.app]) + + @property + def entity_id(self) -> str: + """Fetch the SAML entity ID from the relation.""" + return self.saml_relation_data.entity_id + + @property + def metadata_url(self) -> typing.Optional[str]: + """Fetch the SAML metadata URL from the relation.""" + return str(self.saml_relation_data.metadata_url) + + @property + def certificates(self) -> typing.Tuple[str, ...]: + """Fetch the SAML certificates from the relation.""" + return self.saml_relation_data.certificates + + @property + def endpoints(self) -> typing.Tuple[SamlEndpoint, ...]: + """Fetch the SAML endpoints from the relation.""" + return self.saml_relation_data.endpoints + + +class SamlRequiresEvents(ops.CharmEvents): + """SAML events. + + This class defines the events that a SAML requirer can emit. + + Attrs: + saml_data_available: the SamlDataAvailableEvent. + """ + + saml_data_available = ops.EventSource(SamlDataAvailableEvent) + + +class SamlRequires(ops.Object): + """Requirer side of the SAML relation. + + Attrs: + on: events the provider can emit. + """ + + on = SamlRequiresEvents() + + def __init__(self, charm: ops.CharmBase, relation_name: str = DEFAULT_RELATION_NAME) -> None: + """Construct. + + Args: + charm: the provider charm. + relation_name: the relation name. + """ + super().__init__(charm, relation_name) + self.charm = charm + self.relation_name = relation_name + self.framework.observe(charm.on[relation_name].relation_changed, self._on_relation_changed) + + def _on_relation_changed(self, event: ops.RelationChangedEvent) -> None: + """Event emitted when the relation has changed. + + Args: + event: event triggering this handler. + """ + assert event.relation.app + if event.relation.data[event.relation.app]: + self.on.saml_data_available.emit(event.relation, app=event.app, unit=event.unit) + + def get_relation_data(self) -> typing.Optional[SamlRelationData]: + """Retrieve the relation data. + + Returns: + SmtpRelationData: the relation data. + """ + relation = self.model.get_relation(self.relation_name) + if not relation or not relation.app or not relation.data[relation.app]: + return None + return SamlRelationData.from_relation_data(relation.data[relation.app]) + + +class SamlProvides(ops.Object): + """Provider side of the SAML relation. + + Attrs: + relations: list of charm relations. + """ + + def __init__(self, charm: ops.CharmBase, relation_name: str = DEFAULT_RELATION_NAME) -> None: + """Construct. + + Args: + charm: the provider charm. + relation_name: the relation name. + """ + super().__init__(charm, relation_name) + self.charm = charm + self.relation_name = relation_name + + @property + def relations(self) -> typing.List[ops.Relation]: + """The list of Relation instances associated with this relation_name. + + Returns: + List of relations to this charm. + """ + return list(self.model.relations[self.relation_name]) + + def update_relation_data(self, relation: ops.Relation, saml_data: SamlRelationData) -> None: + """Update the relation data. + + Args: + relation: the relation for which to update the data. + saml_data: a SamlRelationData instance wrapping the data to be updated. + """ + relation.data[self.charm.model.app].update(saml_data.to_relation_data()) diff --git a/examples/django/charm/lib/charms/tempo_coordinator_k8s/v0/tracing.py b/examples/django/charm/lib/charms/tempo_coordinator_k8s/v0/tracing.py new file mode 100644 index 0000000..27144fa --- /dev/null +++ b/examples/django/charm/lib/charms/tempo_coordinator_k8s/v0/tracing.py @@ -0,0 +1,987 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. +"""## Overview. + +This document explains how to integrate with the Tempo charm for the purpose of pushing traces to a +tracing endpoint provided by Tempo. It also explains how alternative implementations of the Tempo charm +may maintain the same interface and be backward compatible with all currently integrated charms. + +## Requirer Library Usage + +Charms seeking to push traces to Tempo, must do so using the `TracingEndpointRequirer` +object from this charm library. For the simplest use cases, using the `TracingEndpointRequirer` +object only requires instantiating it, typically in the constructor of your charm. The +`TracingEndpointRequirer` constructor requires the name of the relation over which a tracing endpoint + is exposed by the Tempo charm, and a list of protocols it intends to send traces with. + This relation must use the `tracing` interface. + The `TracingEndpointRequirer` object may be instantiated as follows + + from charms.tempo_coordinator_k8s.v0.tracing import TracingEndpointRequirer + + def __init__(self, *args): + super().__init__(*args) + # ... + self.tracing = TracingEndpointRequirer(self, + protocols=['otlp_grpc', 'otlp_http', 'jaeger_http_thrift'] + ) + # ... + +Note that the first argument (`self`) to `TracingEndpointRequirer` is always a reference to the +parent charm. + +Alternatively to providing the list of requested protocols at init time, the charm can do it at +any point in time by calling the +`TracingEndpointRequirer.request_protocols(*protocol:str, relation:Optional[Relation])` method. +Using this method also allows you to use per-relation protocols. + +Units of requirer charms obtain the tempo endpoint to which they will push their traces by calling +`TracingEndpointRequirer.get_endpoint(protocol: str)`, where `protocol` is, for example: +- `otlp_grpc` +- `otlp_http` +- `zipkin` +- `tempo` + +If the `protocol` is not in the list of protocols that the charm requested at endpoint set-up time, +the library will raise an error. + +We recommend that you scale up your tracing provider and relate it to an ingress so that your tracing requests +go through the ingress and get load balanced across all units. Otherwise, if the provider's leader goes down, your tracing goes down. + +## Provider Library Usage + +The `TracingEndpointProvider` object may be used by charms to manage relations with their +trace sources. For this purposes a Tempo-like charm needs to do two things + +1. Instantiate the `TracingEndpointProvider` object by providing it a +reference to the parent (Tempo) charm and optionally the name of the relation that the Tempo charm +uses to interact with its trace sources. This relation must conform to the `tracing` interface +and it is strongly recommended that this relation be named `tracing` which is its +default value. + +For example a Tempo charm may instantiate the `TracingEndpointProvider` in its constructor as +follows + + from charms.tempo_coordinator_k8s.v0.tracing import TracingEndpointProvider + + def __init__(self, *args): + super().__init__(*args) + # ... + self.tracing = TracingEndpointProvider(self) + # ... + + + +""" # noqa: W505 +import enum +import json +import logging +from pathlib import Path +from typing import ( + TYPE_CHECKING, + Any, + Dict, + List, + Literal, + MutableMapping, + Optional, + Sequence, + Tuple, + Union, + cast, +) + +import pydantic +from ops.charm import ( + CharmBase, + CharmEvents, + RelationBrokenEvent, + RelationEvent, + RelationRole, +) +from ops.framework import EventSource, Object +from ops.model import ModelError, Relation +from pydantic import BaseModel, Field + +# The unique Charmhub library identifier, never change it +LIBID = "d2f02b1f8d1244b5989fd55bc3a28943" + +# Increment this major API version when introducing breaking changes +LIBAPI = 0 + +# Increment this PATCH version before using `charmcraft publish-lib` or reset +# to 0 if you are raising the major API version +LIBPATCH = 5 + +PYDEPS = ["pydantic"] + +logger = logging.getLogger(__name__) + +DEFAULT_RELATION_NAME = "tracing" +RELATION_INTERFACE_NAME = "tracing" + +# Supported list rationale https://github.com/canonical/tempo-coordinator-k8s-operator/issues/8 +ReceiverProtocol = Literal[ + "zipkin", + "otlp_grpc", + "otlp_http", + "jaeger_grpc", + "jaeger_thrift_http", +] + +RawReceiver = Tuple[ReceiverProtocol, str] +"""Helper type. A raw receiver is defined as a tuple consisting of the protocol name, and the (external, if available), +(secured, if available) resolvable server url. +""" + +BUILTIN_JUJU_KEYS = {"ingress-address", "private-address", "egress-subnets"} + + +class TransportProtocolType(str, enum.Enum): + """Receiver Type.""" + + http = "http" + grpc = "grpc" + + +receiver_protocol_to_transport_protocol: Dict[ReceiverProtocol, TransportProtocolType] = { + "zipkin": TransportProtocolType.http, + "otlp_grpc": TransportProtocolType.grpc, + "otlp_http": TransportProtocolType.http, + "jaeger_thrift_http": TransportProtocolType.http, + "jaeger_grpc": TransportProtocolType.grpc, +} +"""A mapping between telemetry protocols and their corresponding transport protocol. +""" + + +class TracingError(Exception): + """Base class for custom errors raised by this library.""" + + +class NotReadyError(TracingError): + """Raised by the provider wrapper if a requirer hasn't published the required data (yet).""" + + +class ProtocolNotRequestedError(TracingError): + """Raised if the user attempts to obtain an endpoint for a protocol it did not request.""" + + +class DataValidationError(TracingError): + """Raised when data validation fails on IPU relation data.""" + + +class AmbiguousRelationUsageError(TracingError): + """Raised when one wrongly assumes that there can only be one relation on an endpoint.""" + + +if int(pydantic.version.VERSION.split(".")[0]) < 2: + + class DatabagModel(BaseModel): # type: ignore + """Base databag model.""" + + class Config: + """Pydantic config.""" + + # ignore any extra fields in the databag + extra = "ignore" + """Ignore any extra fields in the databag.""" + allow_population_by_field_name = True + """Allow instantiating this class by field name (instead of forcing alias).""" + + _NEST_UNDER = None + + @classmethod + def load(cls, databag: MutableMapping): + """Load this model from a Juju databag.""" + if cls._NEST_UNDER: + return cls.parse_obj(json.loads(databag[cls._NEST_UNDER])) + + try: + data = { + k: json.loads(v) + for k, v in databag.items() + # Don't attempt to parse model-external values + if k in {f.alias for f in cls.__fields__.values()} + } + except json.JSONDecodeError as e: + msg = f"invalid databag contents: expecting json. {databag}" + logger.error(msg) + raise DataValidationError(msg) from e + + try: + return cls.parse_raw(json.dumps(data)) # type: ignore + except pydantic.ValidationError as e: + msg = f"failed to validate databag: {databag}" + logger.debug(msg, exc_info=True) + raise DataValidationError(msg) from e + + def dump(self, databag: Optional[MutableMapping] = None, clear: bool = True): + """Write the contents of this model to Juju databag. + + :param databag: the databag to write the data to. + :param clear: ensure the databag is cleared before writing it. + """ + if clear and databag: + databag.clear() + + if databag is None: + databag = {} + + if self._NEST_UNDER: + databag[self._NEST_UNDER] = self.json(by_alias=True) + return databag + + dct = self.dict() + for key, field in self.__fields__.items(): # type: ignore + value = dct[key] + databag[field.alias or key] = json.dumps(value) + + return databag + +else: + from pydantic import ConfigDict + + class DatabagModel(BaseModel): + """Base databag model.""" + + model_config = ConfigDict( + # ignore any extra fields in the databag + extra="ignore", + # Allow instantiating this class by field name (instead of forcing alias). + populate_by_name=True, + # Custom config key: whether to nest the whole datastructure (as json) + # under a field or spread it out at the toplevel. + _NEST_UNDER=None, # type: ignore + ) + """Pydantic config.""" + + @classmethod + def load(cls, databag: MutableMapping): + """Load this model from a Juju databag.""" + nest_under = cls.model_config.get("_NEST_UNDER") # type: ignore + if nest_under: + return cls.model_validate(json.loads(databag[nest_under])) # type: ignore + + try: + data = { + k: json.loads(v) + for k, v in databag.items() + # Don't attempt to parse model-external values + if k in {(f.alias or n) for n, f in cls.__fields__.items()} + } + except json.JSONDecodeError as e: + msg = f"invalid databag contents: expecting json. {databag}" + logger.error(msg) + raise DataValidationError(msg) from e + + try: + return cls.model_validate_json(json.dumps(data)) # type: ignore + except pydantic.ValidationError as e: + msg = f"failed to validate databag: {databag}" + logger.debug(msg, exc_info=True) + raise DataValidationError(msg) from e + + def dump(self, databag: Optional[MutableMapping] = None, clear: bool = True): + """Write the contents of this model to Juju databag. + + :param databag: the databag to write the data to. + :param clear: ensure the databag is cleared before writing it. + """ + if clear and databag: + databag.clear() + + if databag is None: + databag = {} + nest_under = self.model_config.get("_NEST_UNDER") + if nest_under: + databag[nest_under] = self.model_dump_json( # type: ignore + by_alias=True, + # skip keys whose values are default + exclude_defaults=True, + ) + return databag + + dct = self.model_dump() # type: ignore + for key, field in self.model_fields.items(): # type: ignore + value = dct[key] + if value == field.default: + continue + databag[field.alias or key] = json.dumps(value) + + return databag + + +# todo use models from charm-relation-interfaces +if int(pydantic.version.VERSION.split(".")[0]) < 2: + + class ProtocolType(BaseModel): # type: ignore + """Protocol Type.""" + + class Config: + """Pydantic config.""" + + use_enum_values = True + """Allow serializing enum values.""" + + name: str = Field( + ..., + description="Receiver protocol name. What protocols are supported (and what they are called) " + "may differ per provider.", + examples=["otlp_grpc", "otlp_http", "tempo_http"], + ) + + type: TransportProtocolType = Field( + ..., + description="The transport protocol used by this receiver.", + examples=["http", "grpc"], + ) + +else: + + class ProtocolType(BaseModel): + """Protocol Type.""" + + model_config = ConfigDict( # type: ignore + # Allow serializing enum values. + use_enum_values=True + ) + """Pydantic config.""" + + name: str = Field( + ..., + description="Receiver protocol name. What protocols are supported (and what they are called) " + "may differ per provider.", + examples=["otlp_grpc", "otlp_http", "tempo_http"], + ) + + type: TransportProtocolType = Field( + ..., + description="The transport protocol used by this receiver.", + examples=["http", "grpc"], + ) + + +class Receiver(BaseModel): + """Specification of an active receiver.""" + + protocol: ProtocolType = Field(..., description="Receiver protocol name and type.") + url: str = Field( + ..., + description="""URL at which the receiver is reachable. If there's an ingress, it would be the external URL. + Otherwise, it would be the service's fqdn or internal IP. + If the protocol type is grpc, the url will not contain a scheme.""", + examples=[ + "http://traefik_address:2331", + "https://traefik_address:2331", + "http://tempo_public_ip:2331", + "https://tempo_public_ip:2331", + "tempo_public_ip:2331", + ], + ) + + +class TracingProviderAppData(DatabagModel): # noqa: D101 + """Application databag model for the tracing provider.""" + + receivers: List[Receiver] = Field( + ..., + description="List of all receivers enabled on the tracing provider.", + ) + + +class TracingRequirerAppData(DatabagModel): # noqa: D101 + """Application databag model for the tracing requirer.""" + + receivers: List[ReceiverProtocol] + """Requested receivers.""" + + +class _AutoSnapshotEvent(RelationEvent): + __args__: Tuple[str, ...] = () + __optional_kwargs__: Dict[str, Any] = {} + + @classmethod + def __attrs__(cls): + return cls.__args__ + tuple(cls.__optional_kwargs__.keys()) + + def __init__(self, handle, relation, *args, **kwargs): + super().__init__(handle, relation) + + if not len(self.__args__) == len(args): + raise TypeError("expected {} args, got {}".format(len(self.__args__), len(args))) + + for attr, obj in zip(self.__args__, args): + setattr(self, attr, obj) + for attr, default in self.__optional_kwargs__.items(): + obj = kwargs.get(attr, default) + setattr(self, attr, obj) + + def snapshot(self) -> dict: + dct = super().snapshot() + for attr in self.__attrs__(): + obj = getattr(self, attr) + try: + dct[attr] = obj + except ValueError as e: + raise ValueError( + "cannot automagically serialize {}: " + "override this method and do it " + "manually.".format(obj) + ) from e + + return dct + + def restore(self, snapshot: dict) -> None: + super().restore(snapshot) + for attr, obj in snapshot.items(): + setattr(self, attr, obj) + + +class RelationNotFoundError(Exception): + """Raised if no relation with the given name is found.""" + + def __init__(self, relation_name: str): + self.relation_name = relation_name + self.message = "No relation named '{}' found".format(relation_name) + super().__init__(self.message) + + +class RelationInterfaceMismatchError(Exception): + """Raised if the relation with the given name has an unexpected interface.""" + + def __init__( + self, + relation_name: str, + expected_relation_interface: str, + actual_relation_interface: str, + ): + self.relation_name = relation_name + self.expected_relation_interface = expected_relation_interface + self.actual_relation_interface = actual_relation_interface + self.message = ( + "The '{}' relation has '{}' as interface rather than the expected '{}'".format( + relation_name, actual_relation_interface, expected_relation_interface + ) + ) + + super().__init__(self.message) + + +class RelationRoleMismatchError(Exception): + """Raised if the relation with the given name has a different role than expected.""" + + def __init__( + self, + relation_name: str, + expected_relation_role: RelationRole, + actual_relation_role: RelationRole, + ): + self.relation_name = relation_name + self.expected_relation_interface = expected_relation_role + self.actual_relation_role = actual_relation_role + self.message = "The '{}' relation has role '{}' rather than the expected '{}'".format( + relation_name, repr(actual_relation_role), repr(expected_relation_role) + ) + + super().__init__(self.message) + + +def _validate_relation_by_interface_and_direction( + charm: CharmBase, + relation_name: str, + expected_relation_interface: str, + expected_relation_role: RelationRole, +): + """Validate a relation. + + Verifies that the `relation_name` provided: (1) exists in metadata.yaml, + (2) declares as interface the interface name passed as `relation_interface` + and (3) has the right "direction", i.e., it is a relation that `charm` + provides or requires. + + Args: + charm: a `CharmBase` object to scan for the matching relation. + relation_name: the name of the relation to be verified. + expected_relation_interface: the interface name to be matched by the + relation named `relation_name`. + expected_relation_role: whether the `relation_name` must be either + provided or required by `charm`. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the same relation interface + as specified via the `expected_relation_interface` argument. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the same role as specified + via the `expected_relation_role` argument. + """ + if relation_name not in charm.meta.relations: + raise RelationNotFoundError(relation_name) + + relation = charm.meta.relations[relation_name] + + # fixme: why do we need to cast here? + actual_relation_interface = cast(str, relation.interface_name) + + if actual_relation_interface != expected_relation_interface: + raise RelationInterfaceMismatchError( + relation_name, expected_relation_interface, actual_relation_interface + ) + + if expected_relation_role is RelationRole.provides: + if relation_name not in charm.meta.provides: + raise RelationRoleMismatchError( + relation_name, RelationRole.provides, RelationRole.requires + ) + elif expected_relation_role is RelationRole.requires: + if relation_name not in charm.meta.requires: + raise RelationRoleMismatchError( + relation_name, RelationRole.requires, RelationRole.provides + ) + else: + raise TypeError("Unexpected RelationDirection: {}".format(expected_relation_role)) + + +class RequestEvent(RelationEvent): + """Event emitted when a remote requests a tracing endpoint.""" + + @property + def requested_receivers(self) -> List[ReceiverProtocol]: + """List of receiver protocols that have been requested.""" + relation = self.relation + app = relation.app + if not app: + raise NotReadyError("relation.app is None") + + return TracingRequirerAppData.load(relation.data[app]).receivers + + +class BrokenEvent(RelationBrokenEvent): + """Event emitted when a relation on tracing is broken.""" + + +class TracingEndpointProviderEvents(CharmEvents): + """TracingEndpointProvider events.""" + + request = EventSource(RequestEvent) + broken = EventSource(BrokenEvent) + + +class TracingEndpointProvider(Object): + """Class representing a trace receiver service.""" + + on = TracingEndpointProviderEvents() # type: ignore + + def __init__( + self, + charm: CharmBase, + external_url: Optional[str] = None, + relation_name: str = DEFAULT_RELATION_NAME, + ): + """Initialize. + + Args: + charm: a `CharmBase` instance that manages this instance of the Tempo service. + external_url: external address of the node hosting the tempo server, + if an ingress is present. + relation_name: an optional string name of the relation between `charm` + and the Tempo charmed service. The default is "tracing". + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `tracing` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.requires` + role. + """ + _validate_relation_by_interface_and_direction( + charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.provides + ) + + super().__init__(charm, relation_name + "tracing-provider") + self._charm = charm + self._external_url = external_url + self._relation_name = relation_name + self.framework.observe( + self._charm.on[relation_name].relation_joined, self._on_relation_event + ) + self.framework.observe( + self._charm.on[relation_name].relation_created, self._on_relation_event + ) + self.framework.observe( + self._charm.on[relation_name].relation_changed, self._on_relation_event + ) + self.framework.observe( + self._charm.on[relation_name].relation_broken, self._on_relation_broken_event + ) + + def _on_relation_broken_event(self, e: RelationBrokenEvent): + """Handle relation broken events.""" + self.on.broken.emit(e.relation) + + def _on_relation_event(self, e: RelationEvent): + """Handle relation created/joined/changed events.""" + if self.is_requirer_ready(e.relation): + self.on.request.emit(e.relation) + + def is_requirer_ready(self, relation: Relation): + """Attempt to determine if requirer has already populated app data.""" + try: + self._get_requested_protocols(relation) + except NotReadyError: + return False + return True + + @staticmethod + def _get_requested_protocols(relation: Relation): + app = relation.app + if not app: + raise NotReadyError("relation.app is None") + + try: + databag = TracingRequirerAppData.load(relation.data[app]) + except (json.JSONDecodeError, pydantic.ValidationError, DataValidationError): + logger.info(f"relation {relation} is not ready to talk tracing") + raise NotReadyError() + return databag.receivers + + def requested_protocols(self): + """All receiver protocols that have been requested by our related apps.""" + requested_protocols = set() + for relation in self.relations: + try: + protocols = self._get_requested_protocols(relation) + except NotReadyError: + continue + requested_protocols.update(protocols) + return requested_protocols + + @property + def relations(self) -> List[Relation]: + """All relations active on this endpoint.""" + return self._charm.model.relations[self._relation_name] + + def publish_receivers(self, receivers: Sequence[RawReceiver]): + """Let all requirers know that these receivers are active and listening.""" + if not self._charm.unit.is_leader(): + raise RuntimeError("only leader can do this") + + for relation in self.relations: + try: + TracingProviderAppData( + receivers=[ + Receiver( + url=url, + protocol=ProtocolType( + name=protocol, + type=receiver_protocol_to_transport_protocol[protocol], + ), + ) + for protocol, url in receivers + ], + ).dump(relation.data[self._charm.app]) + + except ModelError as e: + # args are bytes + msg = e.args[0] + if isinstance(msg, bytes): + if msg.startswith( + b"ERROR cannot read relation application settings: permission denied" + ): + logger.error( + f"encountered error {e} while attempting to update_relation_data." + f"The relation must be gone." + ) + continue + raise + + +class EndpointRemovedEvent(RelationBrokenEvent): + """Event representing a change in one of the receiver endpoints.""" + + +class EndpointChangedEvent(_AutoSnapshotEvent): + """Event representing a change in one of the receiver endpoints.""" + + __args__ = ("_receivers",) + + if TYPE_CHECKING: + _receivers = [] # type: List[dict] + + @property + def receivers(self) -> List[Receiver]: + """Cast receivers back from dict.""" + return [Receiver(**i) for i in self._receivers] + + +class TracingEndpointRequirerEvents(CharmEvents): + """TracingEndpointRequirer events.""" + + endpoint_changed = EventSource(EndpointChangedEvent) + endpoint_removed = EventSource(EndpointRemovedEvent) + + +class TracingEndpointRequirer(Object): + """A tracing endpoint for Tempo.""" + + on = TracingEndpointRequirerEvents() # type: ignore + + def __init__( + self, + charm: CharmBase, + relation_name: str = DEFAULT_RELATION_NAME, + protocols: Optional[List[ReceiverProtocol]] = None, + ): + """Construct a tracing requirer for a Tempo charm. + + If your application supports pushing traces to a distributed tracing backend, the + `TracingEndpointRequirer` object enables your charm to easily access endpoint information + exchanged over a `tracing` relation interface. + + Args: + charm: a `CharmBase` object that manages this + `TracingEndpointRequirer` object. Typically, this is `self` in the instantiating + class. + relation_name: an optional string name of the relation between `charm` + and the Tempo charmed service. The default is "tracing". It is strongly + advised not to change the default, so that people deploying your charm will have a + consistent experience with all other charms that provide tracing endpoints. + protocols: optional list of protocols that the charm intends to send traces with. + The provider will enable receivers for these and only these protocols, + so be sure to enable all protocols the charm or its workload are going to need. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `tracing` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.provides` + role. + """ + _validate_relation_by_interface_and_direction( + charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.requires + ) + + super().__init__(charm, relation_name) + + self._is_single_endpoint = charm.meta.relations[relation_name].limit == 1 + + self._charm = charm + self._relation_name = relation_name + + events = self._charm.on[self._relation_name] + self.framework.observe(events.relation_changed, self._on_tracing_relation_changed) + self.framework.observe(events.relation_broken, self._on_tracing_relation_broken) + + if protocols: + self.request_protocols(protocols) + + def request_protocols( + self, protocols: Sequence[ReceiverProtocol], relation: Optional[Relation] = None + ): + """Publish the list of protocols which the provider should activate.""" + # todo: should we check if _is_single_endpoint and len(self.relations) > 1 and raise, here? + relations = [relation] if relation else self.relations + + if not protocols: + # empty sequence + raise ValueError( + "You need to pass a nonempty sequence of protocols to `request_protocols`." + ) + + try: + if self._charm.unit.is_leader(): + for relation in relations: + TracingRequirerAppData( + receivers=list(protocols), + ).dump(relation.data[self._charm.app]) + + except ModelError as e: + # args are bytes + msg = e.args[0] + if isinstance(msg, bytes): + if msg.startswith( + b"ERROR cannot read relation application settings: permission denied" + ): + logger.error( + f"encountered error {e} while attempting to request_protocols." + f"The relation must be gone." + ) + return + raise + + @property + def relations(self) -> List[Relation]: + """The tracing relations associated with this endpoint.""" + return self._charm.model.relations[self._relation_name] + + @property + def _relation(self) -> Optional[Relation]: + """If this wraps a single endpoint, the relation bound to it, if any.""" + if not self._is_single_endpoint: + objname = type(self).__name__ + raise AmbiguousRelationUsageError( + f"This {objname} wraps a {self._relation_name} endpoint that has " + "limit != 1. We can't determine what relation, of the possibly many, you are " + f"talking about. Please pass a relation instance while calling {objname}, " + "or set limit=1 in the charm metadata." + ) + relations = self.relations + return relations[0] if relations else None + + def is_ready(self, relation: Optional[Relation] = None): + """Is this endpoint ready?""" + relation = relation or self._relation + if not relation: + logger.debug(f"no relation on {self._relation_name !r}: tracing not ready") + return False + if relation.data is None: + logger.error(f"relation data is None for {relation}") + return False + if not relation.app: + logger.error(f"{relation} event received but there is no relation.app") + return False + try: + databag = dict(relation.data[relation.app]) + TracingProviderAppData.load(databag) + + except (json.JSONDecodeError, pydantic.ValidationError, DataValidationError): + logger.info(f"failed validating relation data for {relation}") + return False + return True + + def _on_tracing_relation_changed(self, event): + """Notify the providers that there is new endpoint information available.""" + relation = event.relation + if not self.is_ready(relation): + self.on.endpoint_removed.emit(relation) # type: ignore + return + + data = TracingProviderAppData.load(relation.data[relation.app]) + self.on.endpoint_changed.emit(relation, [i.dict() for i in data.receivers]) # type: ignore + + def _on_tracing_relation_broken(self, event: RelationBrokenEvent): + """Notify the providers that the endpoint is broken.""" + relation = event.relation + self.on.endpoint_removed.emit(relation) # type: ignore + + def get_all_endpoints( + self, relation: Optional[Relation] = None + ) -> Optional[TracingProviderAppData]: + """Unmarshalled relation data.""" + relation = relation or self._relation + if not self.is_ready(relation): + return + return TracingProviderAppData.load(relation.data[relation.app]) # type: ignore + + def _get_endpoint( + self, relation: Optional[Relation], protocol: ReceiverProtocol + ) -> Optional[str]: + app_data = self.get_all_endpoints(relation) + if not app_data: + return None + receivers: List[Receiver] = list( + filter(lambda i: i.protocol.name == protocol, app_data.receivers) + ) + if not receivers: + # it can happen if the charm requests tracing protocols, but the relay (such as grafana-agent) isn't yet + # connected to the tracing backend. In this case, it's not an error the charm author can do anything about + logger.warning(f"no receiver found with protocol={protocol!r}.") + return + if len(receivers) > 1: + # if we have more than 1 receiver that matches, it shouldn't matter which receiver we'll be using. + logger.warning( + f"too many receivers with protocol={protocol!r}; using first one. Found: {receivers}" + ) + + receiver = receivers[0] + return receiver.url + + def get_endpoint( + self, protocol: ReceiverProtocol, relation: Optional[Relation] = None + ) -> Optional[str]: + """Receiver endpoint for the given protocol. + + It could happen that this function gets called before the provider publishes the endpoints. + In such a scenario, if a non-leader unit calls this function, a permission denied exception will be raised due to + restricted access. To prevent this, this function needs to be guarded by the `is_ready` check. + + Raises: + ProtocolNotRequestedError: + If the charm unit is the leader unit and attempts to obtain an endpoint for a protocol it did not request. + """ + endpoint = self._get_endpoint(relation or self._relation, protocol=protocol) + if not endpoint: + requested_protocols = set() + relations = [relation] if relation else self.relations + for relation in relations: + try: + databag = TracingRequirerAppData.load(relation.data[self._charm.app]) + except DataValidationError: + continue + + requested_protocols.update(databag.receivers) + + if protocol not in requested_protocols: + raise ProtocolNotRequestedError(protocol, relation) + + return None + return endpoint + + +def charm_tracing_config( + endpoint_requirer: TracingEndpointRequirer, cert_path: Optional[Union[Path, str]] +) -> Tuple[Optional[str], Optional[str]]: + """Return the charm_tracing config you likely want. + + If no endpoint is provided: + disable charm tracing. + If https endpoint is provided but cert_path is not found on disk: + disable charm tracing. + If https endpoint is provided and cert_path is None: + ERROR + Else: + proceed with charm tracing (with or without tls, as appropriate) + + Usage: + >>> from lib.charms.tempo_coordinator_k8s.v0.charm_tracing import trace_charm + >>> from lib.charms.tempo_coordinator_k8s.v0.tracing import charm_tracing_config + >>> @trace_charm(tracing_endpoint="my_endpoint", cert_path="cert_path") + >>> class MyCharm(...): + >>> _cert_path = "/path/to/cert/on/charm/container.crt" + >>> def __init__(self, ...): + >>> self.tracing = TracingEndpointRequirer(...) + >>> self.my_endpoint, self.cert_path = charm_tracing_config( + ... self.tracing, self._cert_path) + """ + if not endpoint_requirer.is_ready(): + return None, None + + endpoint = endpoint_requirer.get_endpoint("otlp_http") + if not endpoint: + return None, None + + is_https = endpoint.startswith("https://") + + if is_https: + if cert_path is None or not Path(cert_path).exists(): + # disable charm tracing until we obtain a cert to prevent tls errors + logger.error( + "Tracing endpoint is https, but no server_cert has been passed." + "Please point @trace_charm to a `server_cert` attr. " + "This might also mean that the tracing provider is related to a " + "certificates provider, but this application is not (yet). " + "In that case, you might just have to wait a bit for the certificates " + "integration to settle. " + ) + return None, None + return endpoint, str(cert_path) + else: + return endpoint, None diff --git a/examples/django/charm/lib/charms/traefik_k8s/v2/ingress.py b/examples/django/charm/lib/charms/traefik_k8s/v2/ingress.py index 582a31f..bb7ac5e 100644 --- a/examples/django/charm/lib/charms/traefik_k8s/v2/ingress.py +++ b/examples/django/charm/lib/charms/traefik_k8s/v2/ingress.py @@ -1,4 +1,4 @@ -# Copyright 2025 Canonical Ltd. +# Copyright 2024 Canonical Ltd. # See LICENSE file for licensing details. r"""# Interface Library for ingress. @@ -56,13 +56,14 @@ def _on_ingress_revoked(self, event: IngressPerAppRevokedEvent): import socket import typing from dataclasses import dataclass +from functools import partial from typing import Any, Callable, Dict, List, MutableMapping, Optional, Sequence, Tuple, Union import pydantic from ops.charm import CharmBase, RelationBrokenEvent, RelationEvent from ops.framework import EventSource, Object, ObjectEvents, StoredState from ops.model import ModelError, Relation, Unit -from pydantic import AnyHttpUrl, BaseModel, Field, validator +from pydantic import AnyHttpUrl, BaseModel, Field # The unique Charmhub library identifier, never change it LIBID = "e6de2a5cd5b34422a204668f3b8f90d2" @@ -72,7 +73,7 @@ def _on_ingress_revoked(self, event: IngressPerAppRevokedEvent): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 13 +LIBPATCH = 14 PYDEPS = ["pydantic"] @@ -84,6 +85,9 @@ def _on_ingress_revoked(self, event: IngressPerAppRevokedEvent): PYDANTIC_IS_V1 = int(pydantic.version.VERSION.split(".")[0]) < 2 if PYDANTIC_IS_V1: + from pydantic import validator + + input_validator = partial(validator, pre=True) class DatabagModel(BaseModel): # type: ignore """Base databag model.""" @@ -143,7 +147,9 @@ def dump(self, databag: Optional[MutableMapping] = None, clear: bool = True): return databag else: - from pydantic import ConfigDict + from pydantic import ConfigDict, field_validator + + input_validator = partial(field_validator, mode="before") class DatabagModel(BaseModel): """Base databag model.""" @@ -171,7 +177,7 @@ def load(cls, databag: MutableMapping): k: json.loads(v) for k, v in databag.items() # Don't attempt to parse model-external values - if k in {(f.alias or n) for n, f in cls.__fields__.items()} # type: ignore + if k in {(f.alias or n) for n, f in cls.model_fields.items()} # type: ignore } except json.JSONDecodeError as e: msg = f"invalid databag contents: expecting json. {databag}" @@ -252,14 +258,14 @@ class IngressRequirerAppData(DatabagModel): default="http", description="What scheme to use in the generated ingress url" ) - @validator("scheme", pre=True) + @input_validator("scheme") def validate_scheme(cls, scheme): # noqa: N805 # pydantic wants 'cls' as first arg """Validate scheme arg.""" if scheme not in {"http", "https", "h2c"}: raise ValueError("invalid scheme: should be one of `http|https|h2c`") return scheme - @validator("port", pre=True) + @input_validator("port") def validate_port(cls, port): # noqa: N805 # pydantic wants 'cls' as first arg """Validate port.""" assert isinstance(port, int), type(port) @@ -277,13 +283,13 @@ class IngressRequirerUnitData(DatabagModel): "IP can only be None if the IP information can't be retrieved from juju.", ) - @validator("host", pre=True) + @input_validator("host") def validate_host(cls, host): # noqa: N805 # pydantic wants 'cls' as first arg """Validate host.""" assert isinstance(host, str), type(host) return host - @validator("ip", pre=True) + @input_validator("ip") def validate_ip(cls, ip): # noqa: N805 # pydantic wants 'cls' as first arg """Validate ip.""" if ip is None: @@ -462,7 +468,10 @@ def _handle_relation(self, event): event.relation, data.app.name, data.app.model, - [unit.dict() for unit in data.units], + [ + unit.dict() if PYDANTIC_IS_V1 else unit.model_dump(mode="json") + for unit in data.units + ], data.app.strip_prefix or False, data.app.redirect_https or False, ) diff --git a/examples/fastapi/charm/lib/charms/data_platform_libs/v0/data_interfaces.py b/examples/fastapi/charm/lib/charms/data_platform_libs/v0/data_interfaces.py index aaed2e5..3bc2dd8 100644 --- a/examples/fastapi/charm/lib/charms/data_platform_libs/v0/data_interfaces.py +++ b/examples/fastapi/charm/lib/charms/data_platform_libs/v0/data_interfaces.py @@ -331,7 +331,7 @@ def _on_topic_requested(self, event: TopicRequestedEvent): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 39 +LIBPATCH = 40 PYDEPS = ["ops>=2.0.0"] @@ -391,6 +391,10 @@ class IllegalOperationError(DataInterfacesError): """To be used when an operation is not allowed to be performed.""" +class PrematureDataAccessError(DataInterfacesError): + """To be raised when the Relation Data may be accessed (written) before protocol init complete.""" + + ############################################################################## # Global helpers / utilities ############################################################################## @@ -1453,6 +1457,8 @@ def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: class ProviderData(Data): """Base provides-side of the data products relation.""" + RESOURCE_FIELD = "database" + def __init__( self, model: Model, @@ -1618,6 +1624,15 @@ def _fetch_my_specific_relation_data( def _update_relation_data(self, relation: Relation, data: Dict[str, str]) -> None: """Set values for fields not caring whether it's a secret or not.""" req_secret_fields = [] + + keys = set(data.keys()) + if self.fetch_relation_field(relation.id, self.RESOURCE_FIELD) is None and ( + keys - {"endpoints", "read-only-endpoints", "replset"} + ): + raise PrematureDataAccessError( + "Premature access to relation data, update is forbidden before the connection is initialized." + ) + if relation.app: req_secret_fields = get_encoded_list(relation, relation.app, REQ_SECRET_FIELDS) @@ -3290,6 +3305,8 @@ class KafkaRequiresEvents(CharmEvents): class KafkaProviderData(ProviderData): """Provider-side of the Kafka relation.""" + RESOURCE_FIELD = "topic" + def __init__(self, model: Model, relation_name: str) -> None: super().__init__(model, relation_name) @@ -3539,6 +3556,8 @@ class OpenSearchRequiresEvents(CharmEvents): class OpenSearchProvidesData(ProviderData): """Provider-side of the OpenSearch relation.""" + RESOURCE_FIELD = "index" + def __init__(self, model: Model, relation_name: str) -> None: super().__init__(model, relation_name) diff --git a/examples/fastapi/charm/lib/charms/loki_k8s/v1/loki_push_api.py b/examples/fastapi/charm/lib/charms/loki_k8s/v1/loki_push_api.py new file mode 100644 index 0000000..d75cb7e --- /dev/null +++ b/examples/fastapi/charm/lib/charms/loki_k8s/v1/loki_push_api.py @@ -0,0 +1,2823 @@ +#!/usr/bin/env python3 +# Copyright 2023 Canonical Ltd. +# See LICENSE file for licensing details. +# +# Learn more at: https://juju.is/docs/sdk + +r"""## Overview. + +This document explains how to use the two principal objects this library provides: + +- `LokiPushApiProvider`: This object is meant to be used by any Charmed Operator that needs to +implement the provider side of the `loki_push_api` relation interface. For instance, a Loki charm. +The provider side of the relation represents the server side, to which logs are being pushed. + +- `LokiPushApiConsumer`: This object is meant to be used by any Charmed Operator that needs to +send log to Loki by implementing the consumer side of the `loki_push_api` relation interface. +For instance, a Promtail or Grafana agent charm which needs to send logs to Loki. + +- `LogProxyConsumer`: DEPRECATED. +This object can be used by any Charmed Operator which needs to send telemetry, such as logs, to +Loki through a Log Proxy by implementing the consumer side of the `loki_push_api` relation +interface. +In order to be able to control the labels on the logs pushed this object adds a Pebble layer +that runs Promtail in the workload container, injecting Juju topology labels into the +logs on the fly. +This object is deprecated. Consider migrating to LogForwarder with the release of Juju 3.6 LTS. + +- `LogForwarder`: This object can be used by any Charmed Operator which needs to send the workload +standard output (stdout) through Pebble's log forwarding mechanism, to Loki endpoints through the +`loki_push_api` relation interface. +In order to be able to control the labels on the logs pushed this object updates the pebble layer's +"log-targets" section with Juju topology. + +Filtering logs in Loki is largely performed on the basis of labels. In the Juju ecosystem, Juju +topology labels are used to uniquely identify the workload which generates telemetry like logs. + + +## LokiPushApiProvider Library Usage + +This object may be used by any Charmed Operator which implements the `loki_push_api` interface. +For instance, Loki or Grafana Agent. + +For this purpose a charm needs to instantiate the `LokiPushApiProvider` object with one mandatory +and three optional arguments. + +- `charm`: A reference to the parent (Loki) charm. + +- `relation_name`: The name of the relation that the charm uses to interact + with its clients, which implement `LokiPushApiConsumer` `LogForwarder`, or `LogProxyConsumer` + (note that LogProxyConsumer is deprecated). + + If provided, this relation name must match a provided relation in metadata.yaml with the + `loki_push_api` interface. + + The default relation name is "logging" for `LokiPushApiConsumer` and `LogForwarder`, and + "log-proxy" for `LogProxyConsumer` (note that LogProxyConsumer is deprecated). + + For example, a provider's `metadata.yaml` file may look as follows: + + ```yaml + provides: + logging: + interface: loki_push_api + ``` + + Subsequently, a Loki charm may instantiate the `LokiPushApiProvider` in its constructor as + follows: + + from charms.loki_k8s.v1.loki_push_api import LokiPushApiProvider + from loki_server import LokiServer + ... + + class LokiOperatorCharm(CharmBase): + ... + + def __init__(self, *args): + super().__init__(*args) + ... + external_url = urlparse(self._external_url) + self.loki_provider = LokiPushApiProvider( + self, + address=external_url.hostname or self.hostname, + port=external_url.port or 80, + scheme=external_url.scheme, + path=f"{external_url.path}/loki/api/v1/push", + ) + ... + + - `port`: Loki Push Api endpoint port. Default value: `3100`. + - `scheme`: Loki Push Api endpoint scheme (`HTTP` or `HTTPS`). Default value: `HTTP` + - `address`: Loki Push Api endpoint address. Default value: `localhost` + - `path`: Loki Push Api endpoint path. Default value: `loki/api/v1/push` + + +The `LokiPushApiProvider` object has several responsibilities: + +1. Set the URL of the Loki Push API in the relation application data bag; the URL + must be unique to all instances (e.g. using a load balancer). + +2. Set the Promtail binary URL (`promtail_binary_zip_url`) so clients that use + `LogProxyConsumer` object could download and configure it. + +3. Process the metadata of the consumer application, provided via the + "metadata" field of the consumer data bag, which are used to annotate the + alert rules (see next point). An example for "metadata" is the following: + + {'model': 'loki', + 'model_uuid': '0b7d1071-ded2-4bf5-80a3-10a81aeb1386', + 'application': 'promtail-k8s' + } + +4. Process alert rules set into the relation by the `LokiPushApiConsumer` + objects, e.g.: + + '{ + "groups": [{ + "name": "loki_0b7d1071-ded2-4bf5-80a3-10a81aeb1386_promtail-k8s_alerts", + "rules": [{ + "alert": "HighPercentageError", + "expr": "sum(rate({app=\\"foo\\", env=\\"production\\"} |= \\"error\\" [5m])) + by (job) \\n /\\nsum(rate({app=\\"foo\\", env=\\"production\\"}[5m])) + by (job)\\n > 0.05 + \\n", "for": "10m", + "labels": { + "severity": "page", + "juju_model": "loki", + "juju_model_uuid": "0b7d1071-ded2-4bf5-80a3-10a81aeb1386", + "juju_application": "promtail-k8s" + }, + "annotations": { + "summary": "High request latency" + } + }] + }] + }' + + +Once these alert rules are sent over relation data, the `LokiPushApiProvider` object +stores these files in the directory `/loki/rules` inside the Loki charm container. After +storing alert rules files, the object will check alert rules by querying Loki API +endpoint: [`loki/api/v1/rules`](https://grafana.com/docs/loki/latest/api/#list-rule-groups). +If there are changes in the alert rules a `loki_push_api_alert_rules_changed` event will +be emitted with details about the `RelationEvent` which triggered it. + +This events should be observed in the charm that uses `LokiPushApiProvider`: + +```python + def __init__(self, *args): + super().__init__(*args) + ... + self.loki_provider = LokiPushApiProvider(self) + self.framework.observe( + self.loki_provider.on.loki_push_api_alert_rules_changed, + self._loki_push_api_alert_rules_changed, + ) +``` + + +## LokiPushApiConsumer Library Usage + +This Loki charm interacts with its clients using the Loki charm library. Charms +seeking to send log to Loki, must do so using the `LokiPushApiConsumer` object from +this charm library. + +> **NOTE**: `LokiPushApiConsumer` also depends on an additional charm library. +> +> Ensure sure you `charmcraft fetch-lib charms.observability_libs.v0.juju_topology` +> when using this library. + +For the simplest use cases, using the `LokiPushApiConsumer` object only requires +instantiating it, typically in the constructor of your charm (the one which +sends logs). + +```python +from charms.loki_k8s.v1.loki_push_api import LokiPushApiConsumer + +class LokiClientCharm(CharmBase): + + def __init__(self, *args): + super().__init__(*args) + ... + self._loki_consumer = LokiPushApiConsumer(self) +``` + +The `LokiPushApiConsumer` constructor requires two things: + +- A reference to the parent (LokiClientCharm) charm. + +- Optionally, the name of the relation that the Loki charm uses to interact + with its clients. If provided, this relation name must match a required + relation in metadata.yaml with the `loki_push_api` interface. + + This argument is not required if your metadata.yaml has precisely one + required relation in metadata.yaml with the `loki_push_api` interface, as the + lib will automatically resolve the relation name inspecting the using the + meta information of the charm + +Any time the relation between a Loki provider charm and a Loki consumer charm is +established, a `LokiPushApiEndpointJoined` event is fired. In the consumer side +is it possible to observe this event with: + +```python + +self.framework.observe( + self._loki_consumer.on.loki_push_api_endpoint_joined, + self._on_loki_push_api_endpoint_joined, +) +``` + +Any time there are departures in relations between the consumer charm and Loki +the consumer charm is informed, through a `LokiPushApiEndpointDeparted` event, for instance: + +```python +self.framework.observe( + self._loki_consumer.on.loki_push_api_endpoint_departed, + self._on_loki_push_api_endpoint_departed, +) +``` + +The consumer charm can then choose to update its configuration in both situations. + +Note that LokiPushApiConsumer does not add any labels automatically on its own. In +order to better integrate with the Canonical Observability Stack, you may want to configure your +software to add Juju topology labels. The +[observability-libs](https://charmhub.io/observability-libs) library can be used to get topology +labels in charm code. See :func:`LogProxyConsumer._scrape_configs` for an example of how +to do this with promtail. + +## LogProxyConsumer Library Usage + +> Note: This object is deprecated. Consider migrating to LogForwarder with the release of Juju 3.6 +> LTS. + +Let's say that we have a workload charm that produces logs, and we need to send those logs to a +workload implementing the `loki_push_api` interface, such as `Loki` or `Grafana Agent`. + +Adopting this object in a Charmed Operator consist of two steps: + +1. Use the `LogProxyConsumer` class by instantiating it in the `__init__` method of the charmed + operator. There are two ways to get logs in to promtail. You can give it a list of files to + read, or you can write to it using the syslog protocol. + + For example: + + ```python + from charms.loki_k8s.v1.loki_push_api import LogProxyConsumer + + ... + + def __init__(self, *args): + ... + self._log_proxy = LogProxyConsumer( + self, + logs_scheme={ + "workload-a": { + "log-files": ["/tmp/worload-a-1.log", "/tmp/worload-a-2.log"], + "syslog-port": 1514, + }, + "workload-b": {"log-files": ["/tmp/worload-b.log"], "syslog-port": 1515}, + }, + relation_name="log-proxy", + ) + self.framework.observe( + self._log_proxy.on.promtail_digest_error, + self._promtail_error, + ) + + def _promtail_error(self, event): + logger.error(event.message) + self.unit.status = BlockedStatus(event.message) + ``` + + Any time the relation between a provider charm and a LogProxy consumer charm is + established, a `LogProxyEndpointJoined` event is fired. In the consumer side is it + possible to observe this event with: + + ```python + + self.framework.observe( + self._log_proxy.on.log_proxy_endpoint_joined, + self._on_log_proxy_endpoint_joined, + ) + ``` + + Any time there are departures in relations between the consumer charm and the provider + the consumer charm is informed, through a `LogProxyEndpointDeparted` event, for instance: + + ```python + self.framework.observe( + self._log_proxy.on.log_proxy_endpoint_departed, + self._on_log_proxy_endpoint_departed, + ) + ``` + + The consumer charm can then choose to update its configuration in both situations. + + Note that: + + - You can configure your syslog software using `localhost` as the address and the method + `LogProxyConsumer.syslog_port("container_name")` to get the port, or, alternatively, if you are using rsyslog + you may use the method `LogProxyConsumer.rsyslog_config("container_name")`. + +2. Modify the `metadata.yaml` file to add: + + - The `log-proxy` relation in the `requires` section: + ```yaml + requires: + log-proxy: + interface: loki_push_api + optional: true + ``` + +Once the library is implemented in a Charmed Operator and a relation is established with +the charm that implements the `loki_push_api` interface, the library will inject a +Pebble layer that runs Promtail in the workload container to send logs. + +By default, the promtail binary injected into the container will be downloaded from the internet. +If, for any reason, the container has limited network access, you may allow charm administrators +to provide their own promtail binary at runtime by adding the following snippet to your charm +metadata: + +```yaml +resources: + promtail-bin: + type: file + description: Promtail binary for logging + filename: promtail-linux +``` + +Which would then allow operators to deploy the charm this way: + +``` +juju deploy \ + ./your_charm.charm \ + --resource promtail-bin=/tmp/promtail-linux-amd64 +``` + +If a different resource name is used, it can be specified with the `promtail_resource_name` +argument to the `LogProxyConsumer` constructor. + +The object can emit a `PromtailDigestError` event: + +- Promtail binary cannot be downloaded. +- The sha256 sum mismatch for promtail binary. + +The object can raise a `ContainerNotFoundError` event: + +- No `container_name` parameter has been specified and the Pod has more than 1 container. + +These can be monitored via the PromtailDigestError events via: + +```python + self.framework.observe( + self._loki_consumer.on.promtail_digest_error, + self._promtail_error, + ) + + def _promtail_error(self, event): + logger.error(msg) + self.unit.status = BlockedStatus(event.message) + ) +``` + +## LogForwarder class Usage + +Let's say that we have a charm's workload that writes logs to the standard output (stdout), +and we need to send those logs to a workload implementing the `loki_push_api` interface, +such as `Loki` or `Grafana Agent`. To know how to reach a Loki instance, a charm would +typically use the `loki_push_api` interface. + +Use the `LogForwarder` class by instantiating it in the `__init__` method of the charm: + +```python +from charms.loki_k8s.v1.loki_push_api import LogForwarder + +... + + def __init__(self, *args): + ... + self._log_forwarder = LogForwarder( + self, + relation_name="logging" # optional, defaults to `logging` + ) +``` + +The `LogForwarder` by default will observe relation events on the `logging` endpoint and +enable/disable log forwarding automatically. +Next, modify the `metadata.yaml` file to add: + +The `log-forwarding` relation in the `requires` section: +```yaml +requires: + logging: + interface: loki_push_api + optional: true +``` + +Once the LogForwader class is implemented in your charm and the relation (implementing the +`loki_push_api` interface) is active and healthy, the library will inject a Pebble layer in +each workload container the charm has access to, to configure Pebble's log forwarding +feature and start sending logs to Loki. + +## Alerting Rules + +This charm library also supports gathering alerting rules from all related Loki client +charms and enabling corresponding alerts within the Loki charm. Alert rules are +automatically gathered by `LokiPushApiConsumer` object from a directory conventionally +named `loki_alert_rules`. + +This directory must reside at the top level in the `src` folder of the +consumer charm. Each file in this directory is assumed to be a single alert rule +in YAML format. The file name must have the `.rule` extension. +The format of this alert rule conforms to the +[Loki docs](https://grafana.com/docs/loki/latest/rules/#alerting-rules). + +An example of the contents of one such file is shown below. + +```yaml +alert: HighPercentageError +expr: | + sum(rate({%%juju_topology%%} |= "error" [5m])) by (job) + / + sum(rate({%%juju_topology%%}[5m])) by (job) + > 0.05 +for: 10m +labels: + severity: page +annotations: + summary: High request latency + +``` + +It is **critical** to use the `%%juju_topology%%` filter in the expression for the alert +rule shown above. This filter is a stub that is automatically replaced by the +`LokiPushApiConsumer` following Loki Client's Juju topology (application, model and its +UUID). Such a topology filter is essential to ensure that alert rules submitted by one +provider charm generates alerts only for that same charm. + +The Loki charm may be related to multiple Loki client charms. Without this, filter +rules submitted by one provider charm will also result in corresponding alerts for other +provider charms. Hence, every alert rule expression must include such a topology filter stub. + +Gathering alert rules and generating rule files within the Loki charm is easily done using +the `alerts()` method of `LokiPushApiProvider`. Alerts generated by Loki will automatically +include Juju topology labels in the alerts. These labels indicate the source of the alert. + +The following labels are automatically added to every alert + +- `juju_model` +- `juju_model_uuid` +- `juju_application` + + +Whether alert rules files does not contain the keys `alert` or `expr` or there is no alert +rules file in `alert_rules_path` a `loki_push_api_alert_rules_error` event is emitted. + +To handle these situations the event must be observed in the `LokiClientCharm` charm.py file: + +```python +class LokiClientCharm(CharmBase): + + def __init__(self, *args): + super().__init__(*args) + ... + self._loki_consumer = LokiPushApiConsumer(self) + + self.framework.observe( + self._loki_consumer.on.loki_push_api_alert_rules_error, + self._alert_rules_error + ) + + def _alert_rules_error(self, event): + self.unit.status = BlockedStatus(event.message) +``` + +## Relation Data + +The Loki charm uses both application and unit relation data to obtain information regarding +Loki Push API and alert rules. + +Units of consumer charm send their alert rules over app relation data using the `alert_rules` +key. + +## Charm logging +The `charms.loki_k8s.v0.charm_logging` library can be used in conjunction with this one to configure python's +logging module to forward all logs to Loki via the loki-push-api interface. + +```python +from lib.charms.loki_k8s.v0.charm_logging import log_charm +from lib.charms.loki_k8s.v1.loki_push_api import charm_logging_config, LokiPushApiConsumer + +@log_charm(logging_endpoint="my_endpoints", server_cert="cert_path") +class MyCharm(...): + _cert_path = "/path/to/cert/on/charm/container.crt" + def __init__(self, ...): + self.logging = LokiPushApiConsumer(...) + self.my_endpoints, self.cert_path = charm_logging_config( + self.logging, self._cert_path) +``` + +Do this, and all charm logs will be forwarded to Loki as soon as a relation is formed. +""" + +import json +import logging +import os +import platform +import re +import socket +import subprocess +import tempfile +import typing +from copy import deepcopy +from gzip import GzipFile +from hashlib import sha256 +from io import BytesIO +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple, Union +from urllib import request +from urllib.error import URLError + +import yaml +from cosl import JujuTopology +from ops.charm import ( + CharmBase, + HookEvent, + PebbleReadyEvent, + RelationBrokenEvent, + RelationCreatedEvent, + RelationDepartedEvent, + RelationEvent, + RelationJoinedEvent, + RelationRole, + WorkloadEvent, +) +from ops.framework import EventBase, EventSource, Object, ObjectEvents +from ops.jujuversion import JujuVersion +from ops.model import Container, ModelError, Relation +from ops.pebble import APIError, ChangeError, Layer, PathError, ProtocolError + +# The unique Charmhub library identifier, never change it +LIBID = "bf76f23cdd03464b877c52bd1d2f563e" + +# Increment this major API version when introducing breaking changes +LIBAPI = 1 + +# Increment this PATCH version before using `charmcraft publish-lib` or reset +# to 0 if you are raising the major API version +LIBPATCH = 13 + +PYDEPS = ["cosl"] + +logger = logging.getLogger(__name__) + +RELATION_INTERFACE_NAME = "loki_push_api" +DEFAULT_RELATION_NAME = "logging" +DEFAULT_ALERT_RULES_RELATIVE_PATH = "./src/loki_alert_rules" +DEFAULT_LOG_PROXY_RELATION_NAME = "log-proxy" + +PROMTAIL_BASE_URL = "https://github.com/canonical/loki-k8s-operator/releases/download" +# To update Promtail version you only need to change the PROMTAIL_VERSION and +# update all sha256 sums in PROMTAIL_BINARIES. To support a new architecture +# you only need to add a new key value pair for the architecture in PROMTAIL_BINARIES. +PROMTAIL_VERSION = "v2.9.7" +PROMTAIL_ARM_BINARY = { + "filename": "promtail-static-arm64", + "zipsha": "c083fdb45e5c794103f974eeb426489b4142438d9e10d0ae272b2aff886e249b", + "binsha": "4cd055c477a301c0bdfdbcea514e6e93f6df5d57425ce10ffc77f3e16fec1ddf", +} + +PROMTAIL_BINARIES = { + "amd64": { + "filename": "promtail-static-amd64", + "zipsha": "6873cbdabf23062aeefed6de5f00ff382710332af3ab90a48c253ea17e08f465", + "binsha": "28da9b99f81296fe297831f3bc9d92aea43b4a92826b8ff04ba433b8cb92fb50", + }, + "arm64": PROMTAIL_ARM_BINARY, + "aarch64": PROMTAIL_ARM_BINARY, +} + +# Paths in `charm` container +BINARY_DIR = "/tmp" + +# Paths in `workload` container +WORKLOAD_BINARY_DIR = "/opt/promtail" +WORKLOAD_CONFIG_DIR = "/etc/promtail" +WORKLOAD_CONFIG_FILE_NAME = "promtail_config.yaml" +WORKLOAD_CONFIG_PATH = "{}/{}".format(WORKLOAD_CONFIG_DIR, WORKLOAD_CONFIG_FILE_NAME) +WORKLOAD_POSITIONS_PATH = "{}/positions.yaml".format(WORKLOAD_BINARY_DIR) +WORKLOAD_SERVICE_NAME = "promtail" + +# These are the initial port values. As we can have more than one container, +# we use odd and even numbers to avoid collisions. +# Each new container adds 2 to the previous value. +HTTP_LISTEN_PORT_START = 9080 # even start port +GRPC_LISTEN_PORT_START = 9095 # odd start port + + +class LokiPushApiError(Exception): + """Base class for errors raised by this module.""" + + +class RelationNotFoundError(LokiPushApiError): + """Raised if there is no relation with the given name.""" + + def __init__(self, relation_name: str): + self.relation_name = relation_name + self.message = "No relation named '{}' found".format(relation_name) + + super().__init__(self.message) + + +class RelationInterfaceMismatchError(LokiPushApiError): + """Raised if the relation with the given name has a different interface.""" + + def __init__( + self, + relation_name: str, + expected_relation_interface: str, + actual_relation_interface: str, + ): + self.relation_name = relation_name + self.expected_relation_interface = expected_relation_interface + self.actual_relation_interface = actual_relation_interface + self.message = ( + "The '{}' relation has '{}' as interface rather than the expected '{}'".format( + relation_name, actual_relation_interface, expected_relation_interface + ) + ) + super().__init__(self.message) + + +class RelationRoleMismatchError(LokiPushApiError): + """Raised if the relation with the given name has a different direction.""" + + def __init__( + self, + relation_name: str, + expected_relation_role: RelationRole, + actual_relation_role: RelationRole, + ): + self.relation_name = relation_name + self.expected_relation_interface = expected_relation_role + self.actual_relation_role = actual_relation_role + self.message = "The '{}' relation has role '{}' rather than the expected '{}'".format( + relation_name, repr(actual_relation_role), repr(expected_relation_role) + ) + super().__init__(self.message) + + +def _validate_relation_by_interface_and_direction( + charm: CharmBase, + relation_name: str, + expected_relation_interface: str, + expected_relation_role: RelationRole, +): + """Verifies that a relation has the necessary characteristics. + + Verifies that the `relation_name` provided: (1) exists in metadata.yaml, + (2) declares as interface the interface name passed as `relation_interface` + and (3) has the right "direction", i.e., it is a relation that `charm` + provides or requires. + + Args: + charm: a `CharmBase` object to scan for the matching relation. + relation_name: the name of the relation to be verified. + expected_relation_interface: the interface name to be matched by the + relation named `relation_name`. + expected_relation_role: whether the `relation_name` must be either + provided or required by `charm`. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the same relation interface + as specified via the `expected_relation_interface` argument. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the same role as specified + via the `expected_relation_role` argument. + """ + if relation_name not in charm.meta.relations: + raise RelationNotFoundError(relation_name) + + relation = charm.meta.relations[relation_name] + + actual_relation_interface = relation.interface_name + if actual_relation_interface != expected_relation_interface: + raise RelationInterfaceMismatchError( + relation_name, + expected_relation_interface, + actual_relation_interface, # pyright: ignore + ) + + if expected_relation_role == RelationRole.provides: + if relation_name not in charm.meta.provides: + raise RelationRoleMismatchError( + relation_name, RelationRole.provides, RelationRole.requires + ) + elif expected_relation_role == RelationRole.requires: + if relation_name not in charm.meta.requires: + raise RelationRoleMismatchError( + relation_name, RelationRole.requires, RelationRole.provides + ) + else: + raise Exception("Unexpected RelationDirection: {}".format(expected_relation_role)) + + +class InvalidAlertRulePathError(Exception): + """Raised if the alert rules folder cannot be found or is otherwise invalid.""" + + def __init__( + self, + alert_rules_absolute_path: Path, + message: str, + ): + self.alert_rules_absolute_path = alert_rules_absolute_path + self.message = message + + super().__init__(self.message) + + +def _is_official_alert_rule_format(rules_dict: dict) -> bool: + """Are alert rules in the upstream format as supported by Loki. + + Alert rules in dictionary format are in "official" form if they + contain a "groups" key, since this implies they contain a list of + alert rule groups. + + Args: + rules_dict: a set of alert rules in Python dictionary format + + Returns: + True if alert rules are in official Loki file format. + """ + return "groups" in rules_dict + + +def _is_single_alert_rule_format(rules_dict: dict) -> bool: + """Are alert rules in single rule format. + + The Loki charm library supports reading of alert rules in a + custom format that consists of a single alert rule per file. This + does not conform to the official Loki alert rule file format + which requires that each alert rules file consists of a list of + alert rule groups and each group consists of a list of alert + rules. + + Alert rules in dictionary form are considered to be in single rule + format if in the least it contains two keys corresponding to the + alert rule name and alert expression. + + Returns: + True if alert rule is in single rule file format. + """ + # one alert rule per file + return set(rules_dict) >= {"alert", "expr"} + + +class AlertRules: + """Utility class for amalgamating Loki alert rule files and injecting juju topology. + + An `AlertRules` object supports aggregating alert rules from files and directories in both + official and single rule file formats using the `add_path()` method. All the alert rules + read are annotated with Juju topology labels and amalgamated into a single data structure + in the form of a Python dictionary using the `as_dict()` method. Such a dictionary can be + easily dumped into JSON format and exchanged over relation data. The dictionary can also + be dumped into YAML format and written directly into an alert rules file that is read by + Loki. Note that multiple `AlertRules` objects must not be written into the same file, + since Loki allows only a single list of alert rule groups per alert rules file. + + The official Loki format is a YAML file conforming to the Loki documentation + (https://grafana.com/docs/loki/latest/api/#list-rule-groups). + The custom single rule format is a subsection of the official YAML, having a single alert + rule, effectively "one alert per file". + """ + + # This class uses the following terminology for the various parts of a rule file: + # - alert rules file: the entire groups[] yaml, including the "groups:" key. + # - alert groups (plural): the list of groups[] (a list, i.e. no "groups:" key) - it is a list + # of dictionaries that have the "name" and "rules" keys. + # - alert group (singular): a single dictionary that has the "name" and "rules" keys. + # - alert rules (plural): all the alerts in a given alert group - a list of dictionaries with + # the "alert" and "expr" keys. + # - alert rule (singular): a single dictionary that has the "alert" and "expr" keys. + + def __init__(self, topology: Optional[JujuTopology] = None): + """Build and alert rule object. + + Args: + topology: a `JujuTopology` instance that is used to annotate all alert rules. + """ + self.topology = topology + self.tool = CosTool(None) + self.alert_groups = [] # type: List[dict] + + def _from_file(self, root_path: Path, file_path: Path) -> List[dict]: + """Read a rules file from path, injecting juju topology. + + Args: + root_path: full path to the root rules folder (used only for generating group name) + file_path: full path to a *.rule file. + + Returns: + A list of dictionaries representing the rules file, if file is valid (the structure is + formed by `yaml.safe_load` of the file); an empty list otherwise. + """ + with file_path.open() as rf: + # Load a list of rules from file then add labels and filters + try: + rule_file = yaml.safe_load(rf) or {} + + except Exception as e: + logger.error("Failed to read alert rules from %s: %s", file_path.name, e) + return [] + + if _is_official_alert_rule_format(rule_file): + alert_groups = rule_file["groups"] + elif _is_single_alert_rule_format(rule_file): + # convert to list of alert groups + # group name is made up from the file name + alert_groups = [{"name": file_path.stem, "rules": [rule_file]}] + else: + # invalid/unsupported + reason = "file is empty" if not rule_file else "unexpected file structure" + logger.error("Invalid rules file (%s): %s", reason, file_path.name) + return [] + + # update rules with additional metadata + for alert_group in alert_groups: + # update group name with topology and sub-path + alert_group["name"] = self._group_name( + str(root_path), + str(file_path), + alert_group["name"], + ) + + # add "juju_" topology labels + for alert_rule in alert_group["rules"]: + if "labels" not in alert_rule: + alert_rule["labels"] = {} + + if self.topology: + # only insert labels that do not already exist + for label, val in self.topology.label_matcher_dict.items(): + if label not in alert_rule["labels"]: + alert_rule["labels"][label] = val + + # insert juju topology filters into a prometheus alert rule + # logql doesn't like empty matchers, so add a job matcher which hits + # any string as a "wildcard" which the topology labels will + # filter down + alert_rule["expr"] = self.tool.inject_label_matchers( + re.sub(r"%%juju_topology%%", r'job=~".+"', alert_rule["expr"]), + self.topology.label_matcher_dict, + ) + + return alert_groups + + def _group_name( + self, + root_path: typing.Union[Path, str], + file_path: typing.Union[Path, str], + group_name: str, + ) -> str: + """Generate group name from path and topology. + + The group name is made up of the relative path between the root dir_path, the file path, + and topology identifier. + + Args: + root_path: path to the root rules dir. + file_path: path to rule file. + group_name: original group name to keep as part of the new augmented group name + + Returns: + New group name, augmented by juju topology and relative path. + """ + file_path = Path(file_path) if not isinstance(file_path, Path) else file_path + root_path = Path(root_path) if not isinstance(root_path, Path) else root_path + rel_path = file_path.parent.relative_to(root_path.as_posix()) + + # We should account for both absolute paths and Windows paths. Convert it to a POSIX + # string, strip off any leading /, then join it + + path_str = "" + if not rel_path == Path("."): + # Get rid of leading / and optionally drive letters so they don't muck up + # the template later, since Path.parts returns them. The 'if relpath.is_absolute ...' + # isn't even needed since re.sub doesn't throw exceptions if it doesn't match, so it's + # optional, but it makes it clear what we're doing. + + # Note that Path doesn't actually care whether the path is valid just to instantiate + # the object, so we can happily strip that stuff out to make templating nicer + rel_path = Path( + re.sub(r"^([A-Za-z]+:)?/", "", rel_path.as_posix()) + if rel_path.is_absolute() + else str(rel_path) + ) + + # Get rid of relative path characters in the middle which both os.path and pathlib + # leave hanging around. We could use path.resolve(), but that would lead to very + # long template strings when rules come from pods and/or other deeply nested charm + # paths + path_str = "_".join(filter(lambda x: x not in ["..", "/"], rel_path.parts)) + + # Generate group name: + # - name, from juju topology + # - suffix, from the relative path of the rule file; + group_name_parts = [self.topology.identifier] if self.topology else [] + group_name_parts.extend([path_str, group_name, "alerts"]) + # filter to remove empty strings + return "_".join(filter(lambda x: x, group_name_parts)) + + @classmethod + def _multi_suffix_glob( + cls, dir_path: Path, suffixes: List[str], recursive: bool = True + ) -> list: + """Helper function for getting all files in a directory that have a matching suffix. + + Args: + dir_path: path to the directory to glob from. + suffixes: list of suffixes to include in the glob (items should begin with a period). + recursive: a flag indicating whether a glob is recursive (nested) or not. + + Returns: + List of files in `dir_path` that have one of the suffixes specified in `suffixes`. + """ + all_files_in_dir = dir_path.glob("**/*" if recursive else "*") + return list(filter(lambda f: f.is_file() and f.suffix in suffixes, all_files_in_dir)) + + def _from_dir(self, dir_path: Path, recursive: bool) -> List[dict]: + """Read all rule files in a directory. + + All rules from files for the same directory are loaded into a single + group. The generated name of this group includes juju topology. + By default, only the top directory is scanned; for nested scanning, pass `recursive=True`. + + Args: + dir_path: directory containing *.rule files (alert rules without groups). + recursive: flag indicating whether to scan for rule files recursively. + + Returns: + a list of dictionaries representing prometheus alert rule groups, each dictionary + representing an alert group (structure determined by `yaml.safe_load`). + """ + alert_groups = [] # type: List[dict] + + # Gather all alerts into a list of groups + for file_path in self._multi_suffix_glob(dir_path, [".rule", ".rules"], recursive): + alert_groups_from_file = self._from_file(dir_path, file_path) + if alert_groups_from_file: + logger.debug("Reading alert rule from %s", file_path) + alert_groups.extend(alert_groups_from_file) + + return alert_groups + + def add_path(self, path_str: str, *, recursive: bool = False): + """Add rules from a dir path. + + All rules from files are aggregated into a data structure representing a single rule file. + All group names are augmented with juju topology. + + Args: + path_str: either a rules file or a dir of rules files. + recursive: whether to read files recursively or not (no impact if `path` is a file). + + Raises: + InvalidAlertRulePathError: if the provided path is invalid. + """ + path = Path(path_str) # type: Path + if path.is_dir(): + self.alert_groups.extend(self._from_dir(path, recursive)) + elif path.is_file(): + self.alert_groups.extend(self._from_file(path.parent, path)) + else: + logger.debug("The alerts file does not exist: %s", path) + + def as_dict(self) -> dict: + """Return standard alert rules file in dict representation. + + Returns: + a dictionary containing a single list of alert rule groups. + The list of alert rule groups is provided as value of the + "groups" dictionary key. + """ + return {"groups": self.alert_groups} if self.alert_groups else {} + + +def _resolve_dir_against_charm_path(charm: CharmBase, *path_elements: str) -> str: + """Resolve the provided path items against the directory of the main file. + + Look up the directory of the `main.py` file being executed. This is normally + going to be the charm.py file of the charm including this library. Then, resolve + the provided path elements and, if the result path exists and is a directory, + return its absolute path; otherwise, raise en exception. + + Raises: + InvalidAlertRulePathError, if the path does not exist or is not a directory. + """ + charm_dir = Path(str(charm.charm_dir)) + if not charm_dir.exists() or not charm_dir.is_dir(): + # Operator Framework does not currently expose a robust + # way to determine the top level charm source directory + # that is consistent across deployed charms and unit tests + # Hence for unit tests the current working directory is used + # TODO: updated this logic when the following ticket is resolved + # https://github.com/canonical/operator/issues/643 + charm_dir = Path(os.getcwd()) + + alerts_dir_path = charm_dir.absolute().joinpath(*path_elements) + + if not alerts_dir_path.exists(): + raise InvalidAlertRulePathError(alerts_dir_path, "directory does not exist") + if not alerts_dir_path.is_dir(): + raise InvalidAlertRulePathError(alerts_dir_path, "is not a directory") + + return str(alerts_dir_path) + + +class NoRelationWithInterfaceFoundError(Exception): + """No relations with the given interface are found in the charm meta.""" + + def __init__(self, charm: CharmBase, relation_interface: Optional[str] = None): + self.charm = charm + self.relation_interface = relation_interface + self.message = ( + "No relations with interface '{}' found in the meta of the '{}' charm".format( + relation_interface, charm.meta.name + ) + ) + + super().__init__(self.message) + + +class MultipleRelationsWithInterfaceFoundError(Exception): + """Multiple relations with the given interface are found in the charm meta.""" + + def __init__(self, charm: CharmBase, relation_interface: str, relations: list): + self.charm = charm + self.relation_interface = relation_interface + self.relations = relations + self.message = ( + "Multiple relations with interface '{}' found in the meta of the '{}' charm.".format( + relation_interface, charm.meta.name + ) + ) + super().__init__(self.message) + + +class LokiPushApiEndpointDeparted(EventBase): + """Event emitted when Loki departed.""" + + +class LokiPushApiEndpointJoined(EventBase): + """Event emitted when Loki joined.""" + + +class LokiPushApiAlertRulesChanged(EventBase): + """Event emitted if there is a change in the alert rules.""" + + def __init__(self, handle, relation, relation_id, app=None, unit=None): + """Pretend we are almost like a RelationEvent. + + Fields to serialize: + { + "relation_name": , + "relation_id": , + "app_name": , + "unit_name": + } + + In this way, we can transparently use `RelationEvent.snapshot()` to pass + it back if we need to log it. + """ + super().__init__(handle) + self.relation = relation + self.relation_id = relation_id + self.app = app + self.unit = unit + + def snapshot(self) -> Dict: + """Save event information.""" + if not self.relation: + return {} + snapshot = {"relation_name": self.relation.name, "relation_id": self.relation.id} + if self.app: + snapshot["app_name"] = self.app.name + if self.unit: + snapshot["unit_name"] = self.unit.name + return snapshot + + def restore(self, snapshot: dict): + """Restore event information.""" + self.relation = self.framework.model.get_relation( + snapshot["relation_name"], snapshot["relation_id"] + ) + app_name = snapshot.get("app_name") + if app_name: + self.app = self.framework.model.get_app(app_name) + else: + self.app = None + unit_name = snapshot.get("unit_name") + if unit_name: + self.unit = self.framework.model.get_unit(unit_name) + else: + self.unit = None + + +class InvalidAlertRuleEvent(EventBase): + """Event emitted when alert rule files are not parsable. + + Enables us to set a clear status on the provider. + """ + + def __init__(self, handle, errors: str = "", valid: bool = False): + super().__init__(handle) + self.errors = errors + self.valid = valid + + def snapshot(self) -> Dict: + """Save alert rule information.""" + return { + "valid": self.valid, + "errors": self.errors, + } + + def restore(self, snapshot): + """Restore alert rule information.""" + self.valid = snapshot["valid"] + self.errors = snapshot["errors"] + + +class LokiPushApiEvents(ObjectEvents): + """Event descriptor for events raised by `LokiPushApiProvider`.""" + + loki_push_api_endpoint_departed = EventSource(LokiPushApiEndpointDeparted) + loki_push_api_endpoint_joined = EventSource(LokiPushApiEndpointJoined) + loki_push_api_alert_rules_changed = EventSource(LokiPushApiAlertRulesChanged) + alert_rule_status_changed = EventSource(InvalidAlertRuleEvent) + + +class LokiPushApiProvider(Object): + """A LokiPushApiProvider class.""" + + on = LokiPushApiEvents() # pyright: ignore + + def __init__( + self, + charm, + relation_name: str = DEFAULT_RELATION_NAME, + *, + port: Union[str, int] = 3100, + scheme: str = "http", + address: str = "localhost", + path: str = "loki/api/v1/push", + ): + """A Loki service provider. + + Args: + charm: a `CharmBase` instance that manages this + instance of the Loki service. + relation_name: an optional string name of the relation between `charm` + and the Loki charmed service. The default is "logging". + It is strongly advised not to change the default, so that people + deploying your charm will have a consistent experience with all + other charms that consume metrics endpoints. + port: an optional port of the Loki service (default is "3100"). + scheme: an optional scheme of the Loki API URL (default is "http"). + address: an optional address of the Loki service (default is "localhost"). + path: an optional path of the Loki API URL (default is "loki/api/v1/push") + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `loki_push_api` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.requires` + role. + """ + _validate_relation_by_interface_and_direction( + charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.provides + ) + super().__init__(charm, relation_name) + self._charm = charm + self._relation_name = relation_name + self._tool = CosTool(self) + self.port = int(port) + self.scheme = scheme + self.address = address + self.path = path + + events = self._charm.on[relation_name] + self.framework.observe(self._charm.on.upgrade_charm, self._on_lifecycle_event) + self.framework.observe(events.relation_joined, self._on_logging_relation_joined) + self.framework.observe(events.relation_changed, self._on_logging_relation_changed) + self.framework.observe(events.relation_departed, self._on_logging_relation_departed) + self.framework.observe(events.relation_broken, self._on_logging_relation_broken) + + def _on_lifecycle_event(self, _): + # Upgrade event or other charm-level event + should_update = False + for relation in self._charm.model.relations[self._relation_name]: + # Don't accidentally flip a True result back. + should_update = should_update or self._process_logging_relation_changed(relation) + if should_update: + # We don't have a RelationEvent, so build it up by hand + first_rel = self._charm.model.relations[self._relation_name][0] + self.on.loki_push_api_alert_rules_changed.emit( + relation=first_rel, + relation_id=first_rel.id, + ) + + def _on_logging_relation_joined(self, event: RelationJoinedEvent): + """Set basic data on relation joins. + + Set the promtail binary URL location, which will not change, and anything + else which may be required, but is static.. + + Args: + event: a `CharmEvent` in response to which the consumer + charm must set its relation data. + """ + if self._charm.unit.is_leader(): + event.relation.data[self._charm.app].update(self._promtail_binary_url) + logger.debug("Saved promtail binary url: %s", self._promtail_binary_url) + + def _on_logging_relation_changed(self, event: HookEvent): + """Handle changes in related consumers. + + Anytime there are changes in the relation between Loki + and its consumers charms. + + Args: + event: a `CharmEvent` in response to which the consumer + charm must update its relation data. + """ + should_update = self._process_logging_relation_changed(event.relation) # pyright: ignore + if should_update: + self.on.loki_push_api_alert_rules_changed.emit( + relation=event.relation, # pyright: ignore + relation_id=event.relation.id, # pyright: ignore + app=self._charm.app, + unit=self._charm.unit, + ) + + def _on_logging_relation_broken(self, event: RelationBrokenEvent): + """Removes alert rules files when consumer charms left the relation with Loki. + + Args: + event: a `CharmEvent` in response to which the Loki + charm must update its relation data. + """ + self.on.loki_push_api_alert_rules_changed.emit( + relation=event.relation, + relation_id=event.relation.id, + app=self._charm.app, + unit=self._charm.unit, + ) + + def _on_logging_relation_departed(self, event: RelationDepartedEvent): + """Removes alert rules files when consumer charms left the relation with Loki. + + Args: + event: a `CharmEvent` in response to which the Loki + charm must update its relation data. + """ + self.on.loki_push_api_alert_rules_changed.emit( + relation=event.relation, + relation_id=event.relation.id, + app=self._charm.app, + unit=self._charm.unit, + ) + + def _should_update_alert_rules(self, relation) -> bool: + """Determine whether alert rules should be regenerated. + + If there are alert rules in the relation data bag, tell the charm + whether to regenerate them based on the boolean returned here. + """ + if relation.data.get(relation.app).get("alert_rules", None) is not None: + return True + return False + + def _process_logging_relation_changed(self, relation: Relation) -> bool: + """Handle changes in related consumers. + + Anytime there are changes in relations between Loki + and its consumers charms, Loki set the `loki_push_api` + into the relation data. Set the endpoint building + appropriately, and if there are alert rules present in + the relation, let the caller know. + Besides Loki generates alert rules files based what + consumer charms forwards, + + Args: + relation: the `Relation` instance to update. + + Returns: + A boolean indicating whether an event should be emitted, so we + only emit one on lifecycle events + """ + relation.data[self._charm.unit]["public_address"] = socket.getfqdn() or "" + self.update_endpoint(relation=relation) + return self._should_update_alert_rules(relation) + + @property + def _promtail_binary_url(self) -> dict: + """URL from which Promtail binary can be downloaded.""" + # construct promtail binary url paths from parts + promtail_binaries = {} + for arch, info in PROMTAIL_BINARIES.items(): + info["url"] = "{}/promtail-{}/{}.gz".format( + PROMTAIL_BASE_URL, PROMTAIL_VERSION, info["filename"] + ) + promtail_binaries[arch] = info + + return {"promtail_binary_zip_url": json.dumps(promtail_binaries)} + + def update_endpoint(self, url: str = "", relation: Optional[Relation] = None) -> None: + """Triggers programmatically the update of endpoint in unit relation data. + + This method should be used when the charm relying on this library needs + to update the relation data in response to something occurring outside + the `logging` relation lifecycle, e.g., in case of a + host address change because the charmed operator becomes connected to an + Ingress after the `logging` relation is established. + + Args: + url: An optional url value to update relation data. + relation: An optional instance of `class:ops.model.Relation` to update. + """ + # if no relation is specified update all of them + if not relation: + if not self._charm.model.relations.get(self._relation_name): + return + + relations_list = self._charm.model.relations.get(self._relation_name) + else: + relations_list = [relation] + + endpoint = self._endpoint(url or self._url) + + for relation in relations_list: + relation.data[self._charm.unit].update({"endpoint": json.dumps(endpoint)}) + + logger.debug("Saved endpoint in unit relation data") + + @property + def _url(self) -> str: + """Get local Loki Push API url. + + Return url to loki, including port number, but without the endpoint subpath. + """ + return "http://{}:{}".format(socket.getfqdn(), self.port) + + def _endpoint(self, url) -> dict: + """Get Loki push API endpoint for a given url. + + Args: + url: A loki unit URL. + + Returns: str + """ + endpoint = "/loki/api/v1/push" + return {"url": url.rstrip("/") + endpoint} + + @property + def alerts(self) -> dict: # noqa: C901 + """Fetch alerts for all relations. + + A Loki alert rules file consists of a list of "groups". Each + group consists of a list of alerts (`rules`) that are sequentially + executed. This method returns all the alert rules provided by each + related metrics provider charm. These rules may be used to generate a + separate alert rules file for each relation since the returned list + of alert groups are indexed by relation ID. Also for each relation ID + associated scrape metadata such as Juju model, UUID and application + name are provided so a unique name may be generated for the rules + file. For each relation the structure of data returned is a dictionary + with four keys + + - groups + - model + - model_uuid + - application + + The value of the `groups` key is such that it may be used to generate + a Loki alert rules file directly using `yaml.dump` but the + `groups` key itself must be included as this is required by Loki, + for example as in `yaml.dump({"groups": alerts["groups"]})`. + + Currently only accepts a list of rules and these + rules are all placed into a single group, even though Loki itself + allows for multiple groups within a single alert rules file. + + Returns: + a dictionary of alert rule groups and associated scrape + metadata indexed by relation ID. + """ + alerts = {} # type: Dict[str, dict] # mapping b/w juju identifiers and alert rule files + for relation in self._charm.model.relations[self._relation_name]: + if not relation.units or not relation.app: + continue + + alert_rules = json.loads(relation.data[relation.app].get("alert_rules", "{}")) + if not alert_rules: + continue + + alert_rules = self._inject_alert_expr_labels(alert_rules) + + identifier, topology = self._get_identifier_by_alert_rules(alert_rules) + if not topology: + try: + metadata = json.loads(relation.data[relation.app]["metadata"]) + identifier = JujuTopology.from_dict(metadata).identifier + alerts[identifier] = self._tool.apply_label_matchers(alert_rules) # type: ignore + + except KeyError as e: + logger.debug( + "Relation %s has no 'metadata': %s", + relation.id, + e, + ) + + if not identifier: + logger.error( + "Alert rules were found but no usable group or identifier was present." + ) + continue + + _, errmsg = self._tool.validate_alert_rules(alert_rules) + if errmsg: + relation.data[self._charm.app]["event"] = json.dumps({"errors": errmsg}) + continue + + alerts[identifier] = alert_rules + + return alerts + + def _get_identifier_by_alert_rules( + self, rules: dict + ) -> Tuple[Union[str, None], Union[JujuTopology, None]]: + """Determine an appropriate dict key for alert rules. + + The key is used as the filename when writing alerts to disk, so the structure + and uniqueness is important. + + Args: + rules: a dict of alert rules + Returns: + A tuple containing an identifier, if found, and a JujuTopology, if it could + be constructed. + """ + if "groups" not in rules: + logger.debug("No alert groups were found in relation data") + return None, None + + # Construct an ID based on what's in the alert rules if they have labels + for group in rules["groups"]: + try: + labels = group["rules"][0]["labels"] + topology = JujuTopology( + # Don't try to safely get required constructor fields. There's already + # a handler for KeyErrors + model_uuid=labels["juju_model_uuid"], + model=labels["juju_model"], + application=labels["juju_application"], + unit=labels.get("juju_unit", ""), + charm_name=labels.get("juju_charm", ""), + ) + return topology.identifier, topology + except KeyError: + logger.debug("Alert rules were found but no usable labels were present") + continue + + logger.warning( + "No labeled alert rules were found, and no 'scrape_metadata' " + "was available. Using the alert group name as filename." + ) + try: + for group in rules["groups"]: + return group["name"], None + except KeyError: + logger.debug("No group name was found to use as identifier") + + return None, None + + def _inject_alert_expr_labels(self, rules: Dict[str, Any]) -> Dict[str, Any]: + """Iterate through alert rules and inject topology into expressions. + + Args: + rules: a dict of alert rules + """ + if "groups" not in rules: + return rules + + modified_groups = [] + for group in rules["groups"]: + # Copy off rules, so we don't modify an object we're iterating over + rules_copy = group["rules"] + for idx, rule in enumerate(rules_copy): + labels = rule.get("labels") + + if labels: + try: + topology = JujuTopology( + # Don't try to safely get required constructor fields. There's already + # a handler for KeyErrors + model_uuid=labels["juju_model_uuid"], + model=labels["juju_model"], + application=labels["juju_application"], + unit=labels.get("juju_unit", ""), + charm_name=labels.get("juju_charm", ""), + ) + + # Inject topology and put it back in the list + rule["expr"] = self._tool.inject_label_matchers( + re.sub(r"%%juju_topology%%,?", "", rule["expr"]), + topology.label_matcher_dict, + ) + except KeyError: + # Some required JujuTopology key is missing. Just move on. + pass + + group["rules"][idx] = rule + + modified_groups.append(group) + + rules["groups"] = modified_groups + return rules + + +class ConsumerBase(Object): + """Consumer's base class.""" + + def __init__( + self, + charm: CharmBase, + relation_name: str = DEFAULT_RELATION_NAME, + alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, + recursive: bool = False, + skip_alert_topology_labeling: bool = False, + ): + super().__init__(charm, relation_name) + self._charm = charm + self._relation_name = relation_name + self.topology = JujuTopology.from_charm(charm) + + try: + alert_rules_path = _resolve_dir_against_charm_path(charm, alert_rules_path) + except InvalidAlertRulePathError as e: + logger.debug( + "Invalid Loki alert rules folder at %s: %s", + e.alert_rules_absolute_path, + e.message, + ) + self._alert_rules_path = alert_rules_path + self._skip_alert_topology_labeling = skip_alert_topology_labeling + + self._recursive = recursive + + def _handle_alert_rules(self, relation): + if not self._charm.unit.is_leader(): + return + + alert_rules = ( + AlertRules(None) if self._skip_alert_topology_labeling else AlertRules(self.topology) + ) + alert_rules.add_path(self._alert_rules_path, recursive=self._recursive) + alert_rules_as_dict = alert_rules.as_dict() + + relation.data[self._charm.app]["metadata"] = json.dumps(self.topology.as_dict()) + relation.data[self._charm.app]["alert_rules"] = json.dumps( + alert_rules_as_dict, + sort_keys=True, # sort, to prevent unnecessary relation_changed events + ) + + @property + def loki_endpoints(self) -> List[dict]: + """Fetch Loki Push API endpoints sent from LokiPushApiProvider through relation data. + + Returns: + A list of dictionaries with Loki Push API endpoints, for instance: + [ + {"url": "http://loki1:3100/loki/api/v1/push"}, + {"url": "http://loki2:3100/loki/api/v1/push"}, + ] + """ + endpoints = [] # type: list + + for relation in self._charm.model.relations[self._relation_name]: + for unit in relation.units: + if unit.app == self._charm.app: + # This is a peer unit + continue + + endpoint = relation.data[unit].get("endpoint") + if endpoint: + deserialized_endpoint = json.loads(endpoint) + endpoints.append(deserialized_endpoint) + + return endpoints + + +class LokiPushApiConsumer(ConsumerBase): + """Loki Consumer class.""" + + on = LokiPushApiEvents() # pyright: ignore + + def __init__( + self, + charm: CharmBase, + relation_name: str = DEFAULT_RELATION_NAME, + alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, + recursive: bool = True, + skip_alert_topology_labeling: bool = False, + ): + """Construct a Loki charm client. + + The `LokiPushApiConsumer` object provides configurations to a Loki client charm, such as + the Loki API endpoint to push logs. It is intended for workloads that can speak + loki_push_api (https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki), such + as grafana-agent. + (If you need to forward workload stdout logs, then use LogForwarder; if you need to forward + log files, then use LogProxyConsumer.) + + `LokiPushApiConsumer` can be instantiated as follows: + + self._loki_consumer = LokiPushApiConsumer(self) + + Args: + charm: a `CharmBase` object that manages this `LokiPushApiConsumer` object. + Typically, this is `self` in the instantiating class. + relation_name: the string name of the relation interface to look up. + If `charm` has exactly one relation with this interface, the relation's + name is returned. If none or multiple relations with the provided interface + are found, this method will raise either a NoRelationWithInterfaceFoundError or + MultipleRelationsWithInterfaceFoundError exception, respectively. + alert_rules_path: a string indicating a path where alert rules can be found + recursive: Whether to scan for rule files recursively. + skip_alert_topology_labeling: whether to skip the alert topology labeling. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `loki_push_api` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.provides` + role. + + Emits: + loki_push_api_endpoint_joined: This event is emitted when the relation between the + Charmed Operator that instantiates `LokiPushApiProvider` (Loki charm for instance) + and the Charmed Operator that instantiates `LokiPushApiConsumer` is established. + loki_push_api_endpoint_departed: This event is emitted when the relation between the + Charmed Operator that implements `LokiPushApiProvider` (Loki charm for instance) + and the Charmed Operator that implements `LokiPushApiConsumer` is removed. + loki_push_api_alert_rules_error: This event is emitted when an invalid alert rules + file is encountered or if `alert_rules_path` is empty. + """ + _validate_relation_by_interface_and_direction( + charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.requires + ) + super().__init__( + charm, relation_name, alert_rules_path, recursive, skip_alert_topology_labeling + ) + events = self._charm.on[relation_name] + self.framework.observe(self._charm.on.upgrade_charm, self._on_lifecycle_event) + self.framework.observe(events.relation_joined, self._on_logging_relation_joined) + self.framework.observe(events.relation_changed, self._on_logging_relation_changed) + self.framework.observe(events.relation_departed, self._on_logging_relation_departed) + + def _on_lifecycle_event(self, _: HookEvent): + """Update require relation data on charm upgrades and other lifecycle events. + + Args: + event: a `CharmEvent` in response to which the consumer + charm must update its relation data. + """ + # Upgrade event or other charm-level event + self._reinitialize_alert_rules() + self.on.loki_push_api_endpoint_joined.emit() + + def _on_logging_relation_joined(self, event: RelationJoinedEvent): + """Handle changes in related consumers. + + Update relation data and emit events when a relation is established. + + Args: + event: a `CharmEvent` in response to which the consumer + charm must update its relation data. + + Emits: + loki_push_api_endpoint_joined: Once the relation is established, this event is emitted. + loki_push_api_alert_rules_error: This event is emitted when an invalid alert rules + file is encountered or if `alert_rules_path` is empty. + """ + # Alert rules will not change over the lifecycle of a charm, and do not need to be + # constantly set on every relation_changed event. Leave them here. + self._handle_alert_rules(event.relation) + self.on.loki_push_api_endpoint_joined.emit() + + def _on_logging_relation_changed(self, event: RelationEvent): + """Handle changes in related consumers. + + Anytime there are changes in the relation between Loki + and its consumers charms. + + Args: + event: a `CharmEvent` in response to which the consumer + charm must update its relation data. + + Emits: + loki_push_api_endpoint_joined: Once the relation is established, this event is emitted. + loki_push_api_alert_rules_error: This event is emitted when an invalid alert rules + file is encountered or if `alert_rules_path` is empty. + """ + if self._charm.unit.is_leader(): + ev = json.loads(event.relation.data[event.app].get("event", "{}")) + + if ev: + valid = bool(ev.get("valid", True)) + errors = ev.get("errors", "") + + if valid and not errors: + self.on.alert_rule_status_changed.emit(valid=valid) + else: + self.on.alert_rule_status_changed.emit(valid=valid, errors=errors) + + self.on.loki_push_api_endpoint_joined.emit() + + def _reinitialize_alert_rules(self): + """Reloads alert rules and updates all relations.""" + for relation in self._charm.model.relations[self._relation_name]: + self._handle_alert_rules(relation) + + def _process_logging_relation_changed(self, relation: Relation): + self._handle_alert_rules(relation) + self.on.loki_push_api_endpoint_joined.emit() + + def _on_logging_relation_departed(self, _: RelationEvent): + """Handle departures in related providers. + + Anytime there are departures in relations between the consumer charm and Loki + the consumer charm is informed, through a `LokiPushApiEndpointDeparted` event. + The consumer charm can then choose to update its configuration. + """ + # Provide default to avoid throwing, as in some complicated scenarios with + # upgrades and hook failures we might not have data in the storage + self.on.loki_push_api_endpoint_departed.emit() + + +class ContainerNotFoundError(Exception): + """Raised if the specified container does not exist.""" + + def __init__(self): + msg = "The specified container does not exist." + self.message = msg + + super().__init__(self.message) + + +class PromtailDigestError(EventBase): + """Event emitted when there is an error with Promtail initialization.""" + + def __init__(self, handle, message): + super().__init__(handle) + self.message = message + + def snapshot(self): + """Save message information.""" + return {"message": self.message} + + def restore(self, snapshot): + """Restore message information.""" + self.message = snapshot["message"] + + +class LogProxyEndpointDeparted(EventBase): + """Event emitted when a Log Proxy has departed.""" + + +class LogProxyEndpointJoined(EventBase): + """Event emitted when a Log Proxy joins.""" + + +class LogProxyEvents(ObjectEvents): + """Event descriptor for events raised by `LogProxyConsumer`.""" + + promtail_digest_error = EventSource(PromtailDigestError) + log_proxy_endpoint_departed = EventSource(LogProxyEndpointDeparted) + log_proxy_endpoint_joined = EventSource(LogProxyEndpointJoined) + + +class LogProxyConsumer(ConsumerBase): + """LogProxyConsumer class. + + > Note: This object is deprecated. Consider migrating to LogForwarder with the release of Juju + > 3.6 LTS. + + The `LogProxyConsumer` object provides a method for attaching `promtail` to + a workload in order to generate structured logging data from applications + which traditionally log to syslog or do not have native Loki integration. + The `LogProxyConsumer` can be instantiated as follows: + + self._log_proxy = LogProxyConsumer( + self, + logs_scheme={ + "workload-a": { + "log-files": ["/tmp/worload-a-1.log", "/tmp/worload-a-2.log"], + "syslog-port": 1514, + }, + "workload-b": {"log-files": ["/tmp/worload-b.log"], "syslog-port": 1515}, + }, + relation_name="log-proxy", + ) + + Args: + charm: a `CharmBase` object that manages this `LokiPushApiConsumer` object. + Typically, this is `self` in the instantiating class. + logs_scheme: a dict which maps containers and a list of log files and syslog port. + relation_name: the string name of the relation interface to look up. + If `charm` has exactly one relation with this interface, the relation's + name is returned. If none or multiple relations with the provided interface + are found, this method will raise either a NoRelationWithInterfaceFoundError or + MultipleRelationsWithInterfaceFoundError exception, respectively. + containers_syslog_port: a dict which maps (and enable) containers and syslog port. + alert_rules_path: an optional path for the location of alert rules + files. Defaults to "./src/loki_alert_rules", + resolved from the directory hosting the charm entry file. + The alert rules are automatically updated on charm upgrade. + recursive: Whether to scan for rule files recursively. + promtail_resource_name: An optional promtail resource name from metadata + if it has been modified and attached + insecure_skip_verify: skip SSL verification. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `loki_push_api` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.provides` + role. + """ + + on = LogProxyEvents() # pyright: ignore + + def __init__( + self, + charm, + *, + logs_scheme=None, + relation_name: str = DEFAULT_LOG_PROXY_RELATION_NAME, + alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, + recursive: bool = False, + promtail_resource_name: Optional[str] = None, + insecure_skip_verify: bool = False, + ): + super().__init__(charm, relation_name, alert_rules_path, recursive) + self._charm = charm + self._logs_scheme = logs_scheme or {} + self._relation_name = relation_name + self.topology = JujuTopology.from_charm(charm) + self._promtail_resource_name = promtail_resource_name or "promtail-bin" + self.insecure_skip_verify = insecure_skip_verify + self._promtails_ports = self._generate_promtails_ports(logs_scheme) + + # architecture used for promtail binary + arch = platform.processor() + if arch in ["x86_64", "amd64"]: + self._arch = "amd64" + elif arch in ["aarch64", "arm64", "armv8b", "armv8l"]: + self._arch = "arm64" + else: + self._arch = arch + + events = self._charm.on[relation_name] + self.framework.observe(events.relation_created, self._on_relation_created) + self.framework.observe(events.relation_changed, self._on_relation_changed) + self.framework.observe(events.relation_departed, self._on_relation_departed) + self._observe_pebble_ready() + + def _observe_pebble_ready(self): + for container in self._containers.keys(): + snake_case_container_name = container.replace("-", "_") + self.framework.observe( + getattr(self._charm.on, f"{snake_case_container_name}_pebble_ready"), + self._on_pebble_ready, + ) + + def _on_pebble_ready(self, event: WorkloadEvent): + """Event handler for `pebble_ready`.""" + if self.model.relations[self._relation_name]: + self._setup_promtail(event.workload) + + def _on_relation_created(self, _: RelationCreatedEvent) -> None: + """Event handler for `relation_created`.""" + for container in self._containers.values(): + if container.can_connect(): + self._setup_promtail(container) + + def _on_relation_changed(self, event: RelationEvent) -> None: + """Event handler for `relation_changed`. + + Args: + event: The event object `RelationChangedEvent`. + """ + self._handle_alert_rules(event.relation) + + if self._charm.unit.is_leader(): + ev = json.loads(event.relation.data[event.app].get("event", "{}")) + + if ev: + valid = bool(ev.get("valid", True)) + errors = ev.get("errors", "") + + if valid and not errors: + self.on.alert_rule_status_changed.emit(valid=valid) + else: + self.on.alert_rule_status_changed.emit(valid=valid, errors=errors) + + for container in self._containers.values(): + if not container.can_connect(): + continue + if self.model.relations[self._relation_name]: + if "promtail" not in container.get_plan().services: + self._setup_promtail(container) + continue + + new_config = self._promtail_config(container.name) + if new_config != self._current_config(container): + container.push( + WORKLOAD_CONFIG_PATH, yaml.safe_dump(new_config), make_dirs=True + ) + + # Loki may send endpoints late. Don't necessarily start, there may be + # no clients + if new_config["clients"]: + container.restart(WORKLOAD_SERVICE_NAME) + self.on.log_proxy_endpoint_joined.emit() + else: + self.on.promtail_digest_error.emit("No promtail client endpoints available!") + + def _on_relation_departed(self, _: RelationEvent) -> None: + """Event handler for `relation_departed`. + + Args: + event: The event object `RelationDepartedEvent`. + """ + for container in self._containers.values(): + if not container.can_connect(): + continue + if not self._charm.model.relations[self._relation_name]: + container.stop(WORKLOAD_SERVICE_NAME) + continue + + new_config = self._promtail_config(container.name) + if new_config != self._current_config(container): + container.push(WORKLOAD_CONFIG_PATH, yaml.safe_dump(new_config), make_dirs=True) + + if new_config["clients"]: + container.restart(WORKLOAD_SERVICE_NAME) + else: + container.stop(WORKLOAD_SERVICE_NAME) + self.on.log_proxy_endpoint_departed.emit() + + def _add_pebble_layer(self, workload_binary_path: str, container: Container) -> None: + """Adds Pebble layer that manages Promtail service in Workload container. + + Args: + workload_binary_path: string providing path to promtail binary in workload container. + container: container into which the layer is to be added. + """ + pebble_layer = Layer( + { + "summary": "promtail layer", + "description": "pebble config layer for promtail", + "services": { + WORKLOAD_SERVICE_NAME: { + "override": "replace", + "summary": WORKLOAD_SERVICE_NAME, + "command": f"{workload_binary_path} {self._cli_args}", + "startup": "disabled", + } + }, + } + ) + container.add_layer(container.name, pebble_layer, combine=True) + + def _create_directories(self, container: Container) -> None: + """Creates the directories for Promtail binary and config file.""" + container.make_dir(path=WORKLOAD_BINARY_DIR, make_parents=True) + container.make_dir(path=WORKLOAD_CONFIG_DIR, make_parents=True) + + def _obtain_promtail(self, promtail_info: dict, container: Container) -> None: + """Obtain promtail binary from an attached resource or download it. + + Args: + promtail_info: dictionary containing information about promtail binary + that must be used. The dictionary must have three keys + - "filename": filename of promtail binary + - "zipsha": sha256 sum of zip file of promtail binary + - "binsha": sha256 sum of unpacked promtail binary + container: container into which promtail is to be obtained. + """ + workload_binary_path = os.path.join(WORKLOAD_BINARY_DIR, promtail_info["filename"]) + if self._promtail_attached_as_resource: + self._push_promtail_if_attached(container, workload_binary_path) + return + + if self._promtail_must_be_downloaded(promtail_info): + self._download_and_push_promtail_to_workload(container, promtail_info) + else: + binary_path = os.path.join(BINARY_DIR, promtail_info["filename"]) + self._push_binary_to_workload(container, binary_path, workload_binary_path) + + def _push_binary_to_workload( + self, container: Container, binary_path: str, workload_binary_path: str + ) -> None: + """Push promtail binary into workload container. + + Args: + binary_path: path in charm container from which promtail binary is read. + workload_binary_path: path in workload container to which promtail binary is pushed. + container: container into which promtail is to be uploaded. + """ + with open(binary_path, "rb") as f: + container.push(workload_binary_path, f, permissions=0o755, make_dirs=True) + logger.debug("The promtail binary file has been pushed to the workload container.") + + @property + def _promtail_attached_as_resource(self) -> bool: + """Checks whether Promtail binary is attached to the charm or not. + + Returns: + a boolean representing whether Promtail binary is attached as a resource or not. + """ + try: + self._charm.model.resources.fetch(self._promtail_resource_name) + return True + except ModelError: + return False + except NameError as e: + if "invalid resource name" in str(e): + return False + raise + + def _push_promtail_if_attached(self, container: Container, workload_binary_path: str) -> bool: + """Checks whether Promtail binary is attached to the charm or not. + + Args: + workload_binary_path: string specifying expected path of promtail + in workload container + container: container into which promtail is to be pushed. + + Returns: + a boolean representing whether Promtail binary is attached or not. + """ + logger.info("Promtail binary file has been obtained from an attached resource.") + resource_path = self._charm.model.resources.fetch(self._promtail_resource_name) + self._push_binary_to_workload(container, resource_path, workload_binary_path) + return True + + def _promtail_must_be_downloaded(self, promtail_info: dict) -> bool: + """Checks whether promtail binary must be downloaded or not. + + Args: + promtail_info: dictionary containing information about promtail binary + that must be used. The dictionary must have three keys + - "filename": filename of promtail binary + - "zipsha": sha256 sum of zip file of promtail binary + - "binsha": sha256 sum of unpacked promtail binary + + Returns: + a boolean representing whether Promtail binary must be downloaded or not. + """ + binary_path = os.path.join(BINARY_DIR, promtail_info["filename"]) + if not self._is_promtail_binary_in_charm(binary_path): + return True + + if not self._sha256sums_matches(binary_path, promtail_info["binsha"]): + return True + + logger.debug("Promtail binary file is already in the the charm container.") + return False + + def _sha256sums_matches(self, file_path: str, sha256sum: str) -> bool: + """Checks whether a file's sha256sum matches or not with a specific sha256sum. + + Args: + file_path: A string representing the files' patch. + sha256sum: The sha256sum against which we want to verify. + + Returns: + a boolean representing whether a file's sha256sum matches or not with + a specific sha256sum. + """ + try: + with open(file_path, "rb") as f: + file_bytes = f.read() + result = sha256(file_bytes).hexdigest() + + if result != sha256sum: + msg = "File sha256sum mismatch, expected:'{}' but got '{}'".format( + sha256sum, result + ) + logger.debug(msg) + return False + + return True + except (APIError, FileNotFoundError): + msg = "File: '{}' could not be opened".format(file_path) + logger.error(msg) + return False + + def _is_promtail_binary_in_charm(self, binary_path: str) -> bool: + """Check if Promtail binary is already stored in charm container. + + Args: + binary_path: string path of promtail binary to check + + Returns: + a boolean representing whether Promtail is present or not. + """ + return True if Path(binary_path).is_file() else False + + def _download_and_push_promtail_to_workload( + self, container: Container, promtail_info: dict + ) -> None: + """Downloads a Promtail zip file and pushes the binary to the workload. + + Args: + promtail_info: dictionary containing information about promtail binary + that must be used. The dictionary must have three keys + - "filename": filename of promtail binary + - "zipsha": sha256 sum of zip file of promtail binary + - "binsha": sha256 sum of unpacked promtail binary + container: container into which promtail is to be uploaded. + """ + # Check for Juju proxy variables and fall back to standard ones if not set + # If no Juju proxy variable was set, we set proxies to None to let the ProxyHandler get + # the proxy env variables from the environment + proxies = { + # The ProxyHandler uses only the protocol names as keys + # https://docs.python.org/3/library/urllib.request.html#urllib.request.ProxyHandler + "https": os.environ.get("JUJU_CHARM_HTTPS_PROXY", ""), + "http": os.environ.get("JUJU_CHARM_HTTP_PROXY", ""), + # The ProxyHandler uses `no` for the no_proxy key + # https://github.com/python/cpython/blob/3.12/Lib/urllib/request.py#L2553 + "no": os.environ.get("JUJU_CHARM_NO_PROXY", ""), + } + proxies = {k: v for k, v in proxies.items() if v != ""} or None + + proxy_handler = request.ProxyHandler(proxies) + opener = request.build_opener(proxy_handler) + + with opener.open(promtail_info["url"]) as r: + file_bytes = r.read() + file_path = os.path.join(BINARY_DIR, promtail_info["filename"] + ".gz") + with open(file_path, "wb") as f: + f.write(file_bytes) + logger.info( + "Promtail binary zip file has been downloaded and stored in: %s", + file_path, + ) + + decompressed_file = GzipFile(fileobj=BytesIO(file_bytes)) + binary_path = os.path.join(BINARY_DIR, promtail_info["filename"]) + with open(binary_path, "wb") as outfile: + outfile.write(decompressed_file.read()) + logger.debug("Promtail binary file has been downloaded.") + + workload_binary_path = os.path.join(WORKLOAD_BINARY_DIR, promtail_info["filename"]) + self._push_binary_to_workload(container, binary_path, workload_binary_path) + + @property + def _cli_args(self) -> str: + """Return the cli arguments to pass to promtail. + + Returns: + The arguments as a string + """ + return "-config.file={}".format(WORKLOAD_CONFIG_PATH) + + def _current_config(self, container) -> dict: + """Property that returns the current Promtail configuration. + + Returns: + A dict containing Promtail configuration. + """ + if not container.can_connect(): + logger.debug("Could not connect to promtail container!") + return {} + try: + raw_current = container.pull(WORKLOAD_CONFIG_PATH).read() + return yaml.safe_load(raw_current) + except (ProtocolError, PathError) as e: + logger.warning( + "Could not check the current promtail configuration due to " + "a failure in retrieving the file: %s", + e, + ) + return {} + + def _promtail_config(self, container_name: str) -> dict: + """Generates the config file for Promtail. + + Reference: https://grafana.com/docs/loki/latest/send-data/promtail/configuration + """ + config = {"clients": self._clients_list()} + if self.insecure_skip_verify: + for client in config["clients"]: + client["tls_config"] = {"insecure_skip_verify": True} + + config.update(self._server_config(container_name)) + config.update(self._positions) + config.update(self._scrape_configs(container_name)) + return config + + def _clients_list(self) -> list: + """Generates a list of clients for use in the promtail config. + + Returns: + A list of endpoints + """ + return self.loki_endpoints + + def _server_config(self, container_name: str) -> dict: + """Generates the server section of the Promtail config file. + + Returns: + A dict representing the `server` section. + """ + return { + "server": { + "http_listen_port": self._promtails_ports[container_name]["http_listen_port"], + "grpc_listen_port": self._promtails_ports[container_name]["grpc_listen_port"], + } + } + + @property + def _positions(self) -> dict: + """Generates the positions section of the Promtail config file. + + Returns: + A dict representing the `positions` section. + """ + return {"positions": {"filename": WORKLOAD_POSITIONS_PATH}} + + def _scrape_configs(self, container_name: str) -> dict: + """Generates the scrape_configs section of the Promtail config file. + + Returns: + A dict representing the `scrape_configs` section. + """ + job_name = f"juju_{self.topology.identifier}" + + # The new JujuTopology doesn't include unit, but LogProxyConsumer should have it + common_labels = { + f"juju_{k}": v + for k, v in self.topology.as_dict(remapped_keys={"charm_name": "charm"}).items() + } + common_labels["container"] = container_name + scrape_configs = [] + + # Files config + labels = common_labels.copy() + labels.update( + { + "job": job_name, + "__path__": "", + } + ) + config = {"targets": ["localhost"], "labels": labels} + scrape_config = { + "job_name": "system", + "static_configs": self._generate_static_configs(config, container_name), + } + scrape_configs.append(scrape_config) + + # Syslog config + syslog_port = self._logs_scheme.get(container_name, {}).get("syslog-port") + if syslog_port: + relabel_mappings = [ + "severity", + "facility", + "hostname", + "app_name", + "proc_id", + "msg_id", + ] + syslog_labels = common_labels.copy() + syslog_labels.update({"job": f"{job_name}_syslog"}) + syslog_config = { + "job_name": "syslog", + "syslog": { + "listen_address": f"127.0.0.1:{syslog_port}", + "label_structured_data": True, + "labels": syslog_labels, + }, + "relabel_configs": [ + {"source_labels": [f"__syslog_message_{val}"], "target_label": val} + for val in relabel_mappings + ] + + [{"action": "labelmap", "regex": "__syslog_message_sd_(.+)"}], + } + scrape_configs.append(syslog_config) # type: ignore + + return {"scrape_configs": scrape_configs} + + def _generate_static_configs(self, config: dict, container_name: str) -> list: + """Generates static_configs section. + + Returns: + - a list of dictionaries representing static_configs section + """ + static_configs = [] + + for _file in self._logs_scheme.get(container_name, {}).get("log-files", []): + conf = deepcopy(config) + conf["labels"]["__path__"] = _file + static_configs.append(conf) + + return static_configs + + def _setup_promtail(self, container: Container) -> None: + # Use the first + relations = self._charm.model.relations[self._relation_name] + if len(relations) > 1: + logger.debug( + "Multiple log_proxy relations. Getting Promtail from application {}".format( + relations[0].app.name + ) + ) + relation = relations[0] + promtail_binaries = json.loads( + relation.data[relation.app].get("promtail_binary_zip_url", "{}") + ) + if not promtail_binaries: + return + + self._create_directories(container) + self._ensure_promtail_binary(promtail_binaries, container) + + container.push( + WORKLOAD_CONFIG_PATH, + yaml.safe_dump(self._promtail_config(container.name)), + make_dirs=True, + ) + + workload_binary_path = os.path.join( + WORKLOAD_BINARY_DIR, promtail_binaries[self._arch]["filename"] + ) + self._add_pebble_layer(workload_binary_path, container) + + if self._current_config(container).get("clients"): + try: + container.restart(WORKLOAD_SERVICE_NAME) + except ChangeError as e: + self.on.promtail_digest_error.emit(str(e)) + else: + self.on.log_proxy_endpoint_joined.emit() + else: + self.on.promtail_digest_error.emit("No promtail client endpoints available!") + + def _ensure_promtail_binary(self, promtail_binaries: dict, container: Container): + if self._is_promtail_installed(promtail_binaries[self._arch], container): + return + + try: + self._obtain_promtail(promtail_binaries[self._arch], container) + except URLError as e: + msg = f"Promtail binary couldn't be downloaded - {str(e)}" + logger.warning(msg) + self.on.promtail_digest_error.emit(msg) + + def _is_promtail_installed(self, promtail_info: dict, container: Container) -> bool: + """Determine if promtail has already been installed to the container. + + Args: + promtail_info: dictionary containing information about promtail binary + that must be used. The dictionary must at least contain a key + "filename" giving the name of promtail binary + container: container in which to check whether promtail is installed. + """ + workload_binary_path = f"{WORKLOAD_BINARY_DIR}/{promtail_info['filename']}" + try: + container.list_files(workload_binary_path) + except (APIError, FileNotFoundError): + return False + return True + + def _generate_promtails_ports(self, logs_scheme) -> dict: + return { + container: { + "http_listen_port": HTTP_LISTEN_PORT_START + 2 * i, + "grpc_listen_port": GRPC_LISTEN_PORT_START + 2 * i, + } + for i, container in enumerate(logs_scheme.keys()) + } + + def syslog_port(self, container_name: str) -> str: + """Gets the port on which promtail is listening for syslog in this container. + + Returns: + A str representing the port + """ + return str(self._logs_scheme.get(container_name, {}).get("syslog-port")) + + def rsyslog_config(self, container_name: str) -> str: + """Generates a config line for use with rsyslog. + + Returns: + The rsyslog config line as a string + """ + return 'action(type="omfwd" protocol="tcp" target="127.0.0.1" port="{}" Template="RSYSLOG_SyslogProtocol23Format" TCP_Framing="octet-counted")'.format( + self._logs_scheme.get(container_name, {}).get("syslog-port") + ) + + @property + def _containers(self) -> Dict[str, Container]: + return {cont: self._charm.unit.get_container(cont) for cont in self._logs_scheme.keys()} + + +class _PebbleLogClient: + @staticmethod + def check_juju_version() -> bool: + """Make sure the Juju version supports Log Forwarding.""" + juju_version = JujuVersion.from_environ() + if not juju_version > JujuVersion(version=str("3.3")): + msg = f"Juju version {juju_version} does not support Pebble log forwarding. Juju >= 3.4 is needed." + logger.warning(msg) + return False + return True + + @staticmethod + def _build_log_target( + unit_name: str, loki_endpoint: str, topology: JujuTopology, enable: bool + ) -> Dict: + """Build a log target for the log forwarding Pebble layer. + + Log target's syntax for enabling/disabling forwarding is explained here: + https://github.com/canonical/pebble?tab=readme-ov-file#log-forwarding + """ + services_value = ["all"] if enable else ["-all"] + + log_target = { + "override": "replace", + "services": services_value, + "type": "loki", + "location": loki_endpoint, + } + if enable: + log_target.update( + { + "labels": { + "product": "Juju", + "charm": topology._charm_name, + "juju_model": topology._model, + "juju_model_uuid": topology._model_uuid, + "juju_application": topology._application, + "juju_unit": topology._unit, + }, + } + ) + + return {unit_name: log_target} + + @staticmethod + def _build_log_targets( + loki_endpoints: Optional[Dict[str, str]], topology: JujuTopology, enable: bool + ): + """Build all the targets for the log forwarding Pebble layer.""" + targets = {} + if not loki_endpoints: + return targets + + for unit_name, endpoint in loki_endpoints.items(): + targets.update( + _PebbleLogClient._build_log_target( + unit_name=unit_name, + loki_endpoint=endpoint, + topology=topology, + enable=enable, + ) + ) + return targets + + @staticmethod + def disable_inactive_endpoints( + container: Container, active_endpoints: Dict[str, str], topology: JujuTopology + ): + """Disable forwarding for inactive endpoints by checking against the Pebble plan.""" + pebble_layer = container.get_plan().to_dict().get("log-targets", None) + if not pebble_layer: + return + + for unit_name, target in pebble_layer.items(): + # If the layer is a disabled log forwarding endpoint, skip it + if "-all" in target["services"]: # pyright: ignore + continue + + if unit_name not in active_endpoints: + layer = Layer( + { # pyright: ignore + "log-targets": _PebbleLogClient._build_log_targets( + loki_endpoints={unit_name: "(removed)"}, + topology=topology, + enable=False, + ) + } + ) + container.add_layer(f"{container.name}-log-forwarding", layer=layer, combine=True) + + @staticmethod + def enable_endpoints( + container: Container, active_endpoints: Dict[str, str], topology: JujuTopology + ): + """Enable forwarding for the specified Loki endpoints.""" + layer = Layer( + { # pyright: ignore + "log-targets": _PebbleLogClient._build_log_targets( + loki_endpoints=active_endpoints, + topology=topology, + enable=True, + ) + } + ) + container.add_layer(f"{container.name}-log-forwarding", layer, combine=True) + + +class LogForwarder(ConsumerBase): + """Forward the standard outputs of all workloads operated by a charm to one or multiple Loki endpoints. + + This class implements Pebble log forwarding. Juju >= 3.4 is needed. + """ + + def __init__( + self, + charm: CharmBase, + *, + relation_name: str = DEFAULT_RELATION_NAME, + alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, + recursive: bool = True, + skip_alert_topology_labeling: bool = False, + ): + _PebbleLogClient.check_juju_version() + super().__init__( + charm, relation_name, alert_rules_path, recursive, skip_alert_topology_labeling + ) + self._charm = charm + self._relation_name = relation_name + + on = self._charm.on[self._relation_name] + self.framework.observe(on.relation_joined, self._update_logging) + self.framework.observe(on.relation_changed, self._update_logging) + self.framework.observe(on.relation_departed, self._update_logging) + self.framework.observe(on.relation_broken, self._update_logging) + + for container_name in self._charm.meta.containers.keys(): + snake_case_container_name = container_name.replace("-", "_") + self.framework.observe( + getattr(self._charm.on, f"{snake_case_container_name}_pebble_ready"), + self._on_pebble_ready, + ) + + def _on_pebble_ready(self, event: PebbleReadyEvent): + if not (loki_endpoints := self._retrieve_endpoints_from_relation()): + logger.warning("No Loki endpoints available") + return + + self._update_endpoints(event.workload, loki_endpoints) + + def _update_logging(self, event: RelationEvent): + """Update the log forwarding to match the active Loki endpoints.""" + if not (loki_endpoints := self._retrieve_endpoints_from_relation()): + logger.warning("No Loki endpoints available") + return + + for container in self._charm.unit.containers.values(): + if container.can_connect(): + self._update_endpoints(container, loki_endpoints) + # else: `_update_endpoints` will be called on pebble-ready anyway. + + self._handle_alert_rules(event.relation) + + def _retrieve_endpoints_from_relation(self) -> dict: + loki_endpoints = {} + + # Get the endpoints from relation data + for relation in self._charm.model.relations[self._relation_name]: + loki_endpoints.update(self._fetch_endpoints(relation)) + + return loki_endpoints + + def _update_endpoints(self, container: Container, loki_endpoints: dict): + _PebbleLogClient.disable_inactive_endpoints( + container=container, + active_endpoints=loki_endpoints, + topology=self.topology, + ) + _PebbleLogClient.enable_endpoints( + container=container, active_endpoints=loki_endpoints, topology=self.topology + ) + + def is_ready(self, relation: Optional[Relation] = None): + """Check if the relation is active and healthy.""" + if not relation: + relations = self._charm.model.relations[self._relation_name] + if not relations: + return False + return all(self.is_ready(relation) for relation in relations) + + try: + if self._extract_urls(relation): + return True + return False + except (KeyError, json.JSONDecodeError): + return False + + def _extract_urls(self, relation: Relation) -> Dict[str, str]: + """Default getter function to extract Loki endpoints from a relation. + + Returns: + A dictionary of remote units and the respective Loki endpoint. + { + "loki/0": "http://loki:3100/loki/api/v1/push", + "another-loki/0": "http://another-loki:3100/loki/api/v1/push", + } + """ + endpoints: Dict = {} + + for unit in relation.units: + endpoint = relation.data[unit]["endpoint"] + deserialized_endpoint = json.loads(endpoint) + url = deserialized_endpoint["url"] + endpoints[unit.name] = url + + return endpoints + + def _fetch_endpoints(self, relation: Relation) -> Dict[str, str]: + """Fetch Loki Push API endpoints from relation data using the endpoints getter.""" + endpoints: Dict = {} + + if not self.is_ready(relation): + logger.warning(f"The relation '{relation.name}' is not ready yet.") + return endpoints + + # if the code gets here, the function won't raise anymore because it's + # also called in is_ready() + endpoints = self._extract_urls(relation) + + return endpoints + + +class CosTool: + """Uses cos-tool to inject label matchers into alert rule expressions and validate rules.""" + + _path = None + _disabled = False + + def __init__(self, charm): + self._charm = charm + + @property + def path(self): + """Lazy lookup of the path of cos-tool.""" + if self._disabled: + return None + if not self._path: + self._path = self._get_tool_path() + if not self._path: + logger.debug("Skipping injection of juju topology as label matchers") + self._disabled = True + return self._path + + def apply_label_matchers(self, rules) -> dict: + """Will apply label matchers to the expression of all alerts in all supplied groups.""" + if not self.path: + return rules + for group in rules["groups"]: + rules_in_group = group.get("rules", []) + for rule in rules_in_group: + topology = {} + # if the user for some reason has provided juju_unit, we'll need to honor it + # in most cases, however, this will be empty + for label in [ + "juju_model", + "juju_model_uuid", + "juju_application", + "juju_charm", + "juju_unit", + ]: + if label in rule["labels"]: + topology[label] = rule["labels"][label] + + rule["expr"] = self.inject_label_matchers(rule["expr"], topology) + return rules + + def validate_alert_rules(self, rules: dict) -> Tuple[bool, str]: + """Will validate correctness of alert rules, returning a boolean and any errors.""" + if not self.path: + logger.debug("`cos-tool` unavailable. Not validating alert correctness.") + return True, "" + + with tempfile.TemporaryDirectory() as tmpdir: + rule_path = Path(tmpdir + "/validate_rule.yaml") + + # Smash "our" rules format into what upstream actually uses, which is more like: + # + # groups: + # - name: foo + # rules: + # - alert: SomeAlert + # expr: up + # - alert: OtherAlert + # expr: up + transformed_rules = {"groups": []} # type: ignore + for rule in rules["groups"]: + transformed_rules["groups"].append(rule) + + rule_path.write_text(yaml.dump(transformed_rules)) + args = [str(self.path), "--format", "logql", "validate", str(rule_path)] + # noinspection PyBroadException + try: + self._exec(args) + return True, "" + except subprocess.CalledProcessError as e: + logger.debug("Validating the rules failed: %s", e.output) + return False, ", ".join([line for line in e.output if "error validating" in line]) + + def inject_label_matchers(self, expression, topology) -> str: + """Add label matchers to an expression.""" + if not topology: + return expression + if not self.path: + logger.debug("`cos-tool` unavailable. Leaving expression unchanged: %s", expression) + return expression + args = [str(self.path), "--format", "logql", "transform"] + args.extend( + ["--label-matcher={}={}".format(key, value) for key, value in topology.items()] + ) + + args.extend(["{}".format(expression)]) + # noinspection PyBroadException + try: + return self._exec(args) + except subprocess.CalledProcessError as e: + logger.debug('Applying the expression failed: "%s", falling back to the original', e) + print('Applying the expression failed: "{}", falling back to the original'.format(e)) + return expression + + def _get_tool_path(self) -> Optional[Path]: + arch = platform.processor() + arch = "amd64" if arch == "x86_64" else arch + res = "cos-tool-{}".format(arch) + try: + path = Path(res).resolve() + path.chmod(0o777) + return path + except NotImplementedError: + logger.debug("System lacks support for chmod") + except FileNotFoundError: + logger.debug('Could not locate cos-tool at: "{}"'.format(res)) + return None + + def _exec(self, cmd) -> str: + result = subprocess.run(cmd, check=True, stdout=subprocess.PIPE) + output = result.stdout.decode("utf-8").strip() + return output + + +def charm_logging_config( + endpoint_requirer: LokiPushApiConsumer, cert_path: Optional[Union[Path, str]] +) -> Tuple[Optional[List[str]], Optional[str]]: + """Utility function to determine the charm_logging config you will likely want. + + If no endpoint is provided: + disable charm logging. + If https endpoint is provided but cert_path is not found on disk: + disable charm logging. + If https endpoint is provided and cert_path is None: + ERROR + Else: + proceed with charm logging (with or without tls, as appropriate) + + Args: + endpoint_requirer: an instance of LokiPushApiConsumer. + cert_path: a path where a cert is stored. + + Returns: + A tuple with (optionally) the values of the endpoints and the certificate path. + + Raises: + LokiPushApiError: if some endpoint are http and others https. + """ + endpoints = [ep["url"] for ep in endpoint_requirer.loki_endpoints] + if not endpoints: + return None, None + + https = tuple(endpoint.startswith("https://") for endpoint in endpoints) + + if all(https): # all endpoints are https + if cert_path is None: + raise LokiPushApiError("Cannot send logs to https endpoints without a certificate.") + if not Path(cert_path).exists(): + # if endpoints is https BUT we don't have a server_cert yet: + # disable charm logging until we do to prevent tls errors + return None, None + return endpoints, str(cert_path) + + if all(not x for x in https): # all endpoints are http + return endpoints, None + + # if there's a disagreement, that's very weird: + raise LokiPushApiError("Some endpoints are http, some others are https. That's not good.") diff --git a/examples/fastapi/charm/lib/charms/prometheus_k8s/v0/prometheus_scrape.py b/examples/fastapi/charm/lib/charms/prometheus_k8s/v0/prometheus_scrape.py index e3d35c6..ca554fb 100644 --- a/examples/fastapi/charm/lib/charms/prometheus_k8s/v0/prometheus_scrape.py +++ b/examples/fastapi/charm/lib/charms/prometheus_k8s/v0/prometheus_scrape.py @@ -362,7 +362,7 @@ def _on_scrape_targets_changed(self, event): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 47 +LIBPATCH = 48 PYDEPS = ["cosl"] @@ -2364,12 +2364,9 @@ def _get_tool_path(self) -> Optional[Path]: arch = "amd64" if arch == "x86_64" else arch res = "cos-tool-{}".format(arch) try: - path = Path(res).resolve() - path.chmod(0o777) + path = Path(res).resolve(strict=True) return path - except NotImplementedError: - logger.debug("System lacks support for chmod") - except FileNotFoundError: + except (FileNotFoundError, OSError): logger.debug('Could not locate cos-tool at: "{}"'.format(res)) return None diff --git a/examples/fastapi/charm/lib/charms/redis_k8s/v0/redis.py b/examples/fastapi/charm/lib/charms/redis_k8s/v0/redis.py index a731507..e28b14c 100644 --- a/examples/fastapi/charm/lib/charms/redis_k8s/v0/redis.py +++ b/examples/fastapi/charm/lib/charms/redis_k8s/v0/redis.py @@ -39,7 +39,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version. -LIBPATCH = 6 +LIBPATCH = 7 logger = logging.getLogger(__name__) @@ -78,6 +78,18 @@ def _on_relation_broken(self, event): # Trigger an event that our charm can react to. self.charm.on.redis_relation_updated.emit() + @property + def app_data(self) -> Optional[Dict[str, str]]: + """Retrieve the app data. + + Returns: + Dict: dict containing the app data. + """ + relation = self.model.get_relation(self.relation_name) + if not relation: + return None + return relation.data[relation.app] + @property def relation_data(self) -> Optional[Dict[str, str]]: """Retrieve the relation data. @@ -98,10 +110,16 @@ def url(self) -> Optional[str]: Returns: str: the Redis URL. """ - relation_data = self.relation_data - if not relation_data: + if not (relation_data := self.relation_data): return None + redis_host = relation_data.get("hostname") + + if app_data := self.app_data: + try: + redis_host = self.app_data.get("leader-host", redis_host) + except KeyError: + pass redis_port = relation_data.get("port") return f"redis://{redis_host}:{redis_port}" diff --git a/examples/fastapi/charm/lib/charms/tempo_coordinator_k8s/v0/tracing.py b/examples/fastapi/charm/lib/charms/tempo_coordinator_k8s/v0/tracing.py new file mode 100644 index 0000000..27144fa --- /dev/null +++ b/examples/fastapi/charm/lib/charms/tempo_coordinator_k8s/v0/tracing.py @@ -0,0 +1,987 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. +"""## Overview. + +This document explains how to integrate with the Tempo charm for the purpose of pushing traces to a +tracing endpoint provided by Tempo. It also explains how alternative implementations of the Tempo charm +may maintain the same interface and be backward compatible with all currently integrated charms. + +## Requirer Library Usage + +Charms seeking to push traces to Tempo, must do so using the `TracingEndpointRequirer` +object from this charm library. For the simplest use cases, using the `TracingEndpointRequirer` +object only requires instantiating it, typically in the constructor of your charm. The +`TracingEndpointRequirer` constructor requires the name of the relation over which a tracing endpoint + is exposed by the Tempo charm, and a list of protocols it intends to send traces with. + This relation must use the `tracing` interface. + The `TracingEndpointRequirer` object may be instantiated as follows + + from charms.tempo_coordinator_k8s.v0.tracing import TracingEndpointRequirer + + def __init__(self, *args): + super().__init__(*args) + # ... + self.tracing = TracingEndpointRequirer(self, + protocols=['otlp_grpc', 'otlp_http', 'jaeger_http_thrift'] + ) + # ... + +Note that the first argument (`self`) to `TracingEndpointRequirer` is always a reference to the +parent charm. + +Alternatively to providing the list of requested protocols at init time, the charm can do it at +any point in time by calling the +`TracingEndpointRequirer.request_protocols(*protocol:str, relation:Optional[Relation])` method. +Using this method also allows you to use per-relation protocols. + +Units of requirer charms obtain the tempo endpoint to which they will push their traces by calling +`TracingEndpointRequirer.get_endpoint(protocol: str)`, where `protocol` is, for example: +- `otlp_grpc` +- `otlp_http` +- `zipkin` +- `tempo` + +If the `protocol` is not in the list of protocols that the charm requested at endpoint set-up time, +the library will raise an error. + +We recommend that you scale up your tracing provider and relate it to an ingress so that your tracing requests +go through the ingress and get load balanced across all units. Otherwise, if the provider's leader goes down, your tracing goes down. + +## Provider Library Usage + +The `TracingEndpointProvider` object may be used by charms to manage relations with their +trace sources. For this purposes a Tempo-like charm needs to do two things + +1. Instantiate the `TracingEndpointProvider` object by providing it a +reference to the parent (Tempo) charm and optionally the name of the relation that the Tempo charm +uses to interact with its trace sources. This relation must conform to the `tracing` interface +and it is strongly recommended that this relation be named `tracing` which is its +default value. + +For example a Tempo charm may instantiate the `TracingEndpointProvider` in its constructor as +follows + + from charms.tempo_coordinator_k8s.v0.tracing import TracingEndpointProvider + + def __init__(self, *args): + super().__init__(*args) + # ... + self.tracing = TracingEndpointProvider(self) + # ... + + + +""" # noqa: W505 +import enum +import json +import logging +from pathlib import Path +from typing import ( + TYPE_CHECKING, + Any, + Dict, + List, + Literal, + MutableMapping, + Optional, + Sequence, + Tuple, + Union, + cast, +) + +import pydantic +from ops.charm import ( + CharmBase, + CharmEvents, + RelationBrokenEvent, + RelationEvent, + RelationRole, +) +from ops.framework import EventSource, Object +from ops.model import ModelError, Relation +from pydantic import BaseModel, Field + +# The unique Charmhub library identifier, never change it +LIBID = "d2f02b1f8d1244b5989fd55bc3a28943" + +# Increment this major API version when introducing breaking changes +LIBAPI = 0 + +# Increment this PATCH version before using `charmcraft publish-lib` or reset +# to 0 if you are raising the major API version +LIBPATCH = 5 + +PYDEPS = ["pydantic"] + +logger = logging.getLogger(__name__) + +DEFAULT_RELATION_NAME = "tracing" +RELATION_INTERFACE_NAME = "tracing" + +# Supported list rationale https://github.com/canonical/tempo-coordinator-k8s-operator/issues/8 +ReceiverProtocol = Literal[ + "zipkin", + "otlp_grpc", + "otlp_http", + "jaeger_grpc", + "jaeger_thrift_http", +] + +RawReceiver = Tuple[ReceiverProtocol, str] +"""Helper type. A raw receiver is defined as a tuple consisting of the protocol name, and the (external, if available), +(secured, if available) resolvable server url. +""" + +BUILTIN_JUJU_KEYS = {"ingress-address", "private-address", "egress-subnets"} + + +class TransportProtocolType(str, enum.Enum): + """Receiver Type.""" + + http = "http" + grpc = "grpc" + + +receiver_protocol_to_transport_protocol: Dict[ReceiverProtocol, TransportProtocolType] = { + "zipkin": TransportProtocolType.http, + "otlp_grpc": TransportProtocolType.grpc, + "otlp_http": TransportProtocolType.http, + "jaeger_thrift_http": TransportProtocolType.http, + "jaeger_grpc": TransportProtocolType.grpc, +} +"""A mapping between telemetry protocols and their corresponding transport protocol. +""" + + +class TracingError(Exception): + """Base class for custom errors raised by this library.""" + + +class NotReadyError(TracingError): + """Raised by the provider wrapper if a requirer hasn't published the required data (yet).""" + + +class ProtocolNotRequestedError(TracingError): + """Raised if the user attempts to obtain an endpoint for a protocol it did not request.""" + + +class DataValidationError(TracingError): + """Raised when data validation fails on IPU relation data.""" + + +class AmbiguousRelationUsageError(TracingError): + """Raised when one wrongly assumes that there can only be one relation on an endpoint.""" + + +if int(pydantic.version.VERSION.split(".")[0]) < 2: + + class DatabagModel(BaseModel): # type: ignore + """Base databag model.""" + + class Config: + """Pydantic config.""" + + # ignore any extra fields in the databag + extra = "ignore" + """Ignore any extra fields in the databag.""" + allow_population_by_field_name = True + """Allow instantiating this class by field name (instead of forcing alias).""" + + _NEST_UNDER = None + + @classmethod + def load(cls, databag: MutableMapping): + """Load this model from a Juju databag.""" + if cls._NEST_UNDER: + return cls.parse_obj(json.loads(databag[cls._NEST_UNDER])) + + try: + data = { + k: json.loads(v) + for k, v in databag.items() + # Don't attempt to parse model-external values + if k in {f.alias for f in cls.__fields__.values()} + } + except json.JSONDecodeError as e: + msg = f"invalid databag contents: expecting json. {databag}" + logger.error(msg) + raise DataValidationError(msg) from e + + try: + return cls.parse_raw(json.dumps(data)) # type: ignore + except pydantic.ValidationError as e: + msg = f"failed to validate databag: {databag}" + logger.debug(msg, exc_info=True) + raise DataValidationError(msg) from e + + def dump(self, databag: Optional[MutableMapping] = None, clear: bool = True): + """Write the contents of this model to Juju databag. + + :param databag: the databag to write the data to. + :param clear: ensure the databag is cleared before writing it. + """ + if clear and databag: + databag.clear() + + if databag is None: + databag = {} + + if self._NEST_UNDER: + databag[self._NEST_UNDER] = self.json(by_alias=True) + return databag + + dct = self.dict() + for key, field in self.__fields__.items(): # type: ignore + value = dct[key] + databag[field.alias or key] = json.dumps(value) + + return databag + +else: + from pydantic import ConfigDict + + class DatabagModel(BaseModel): + """Base databag model.""" + + model_config = ConfigDict( + # ignore any extra fields in the databag + extra="ignore", + # Allow instantiating this class by field name (instead of forcing alias). + populate_by_name=True, + # Custom config key: whether to nest the whole datastructure (as json) + # under a field or spread it out at the toplevel. + _NEST_UNDER=None, # type: ignore + ) + """Pydantic config.""" + + @classmethod + def load(cls, databag: MutableMapping): + """Load this model from a Juju databag.""" + nest_under = cls.model_config.get("_NEST_UNDER") # type: ignore + if nest_under: + return cls.model_validate(json.loads(databag[nest_under])) # type: ignore + + try: + data = { + k: json.loads(v) + for k, v in databag.items() + # Don't attempt to parse model-external values + if k in {(f.alias or n) for n, f in cls.__fields__.items()} + } + except json.JSONDecodeError as e: + msg = f"invalid databag contents: expecting json. {databag}" + logger.error(msg) + raise DataValidationError(msg) from e + + try: + return cls.model_validate_json(json.dumps(data)) # type: ignore + except pydantic.ValidationError as e: + msg = f"failed to validate databag: {databag}" + logger.debug(msg, exc_info=True) + raise DataValidationError(msg) from e + + def dump(self, databag: Optional[MutableMapping] = None, clear: bool = True): + """Write the contents of this model to Juju databag. + + :param databag: the databag to write the data to. + :param clear: ensure the databag is cleared before writing it. + """ + if clear and databag: + databag.clear() + + if databag is None: + databag = {} + nest_under = self.model_config.get("_NEST_UNDER") + if nest_under: + databag[nest_under] = self.model_dump_json( # type: ignore + by_alias=True, + # skip keys whose values are default + exclude_defaults=True, + ) + return databag + + dct = self.model_dump() # type: ignore + for key, field in self.model_fields.items(): # type: ignore + value = dct[key] + if value == field.default: + continue + databag[field.alias or key] = json.dumps(value) + + return databag + + +# todo use models from charm-relation-interfaces +if int(pydantic.version.VERSION.split(".")[0]) < 2: + + class ProtocolType(BaseModel): # type: ignore + """Protocol Type.""" + + class Config: + """Pydantic config.""" + + use_enum_values = True + """Allow serializing enum values.""" + + name: str = Field( + ..., + description="Receiver protocol name. What protocols are supported (and what they are called) " + "may differ per provider.", + examples=["otlp_grpc", "otlp_http", "tempo_http"], + ) + + type: TransportProtocolType = Field( + ..., + description="The transport protocol used by this receiver.", + examples=["http", "grpc"], + ) + +else: + + class ProtocolType(BaseModel): + """Protocol Type.""" + + model_config = ConfigDict( # type: ignore + # Allow serializing enum values. + use_enum_values=True + ) + """Pydantic config.""" + + name: str = Field( + ..., + description="Receiver protocol name. What protocols are supported (and what they are called) " + "may differ per provider.", + examples=["otlp_grpc", "otlp_http", "tempo_http"], + ) + + type: TransportProtocolType = Field( + ..., + description="The transport protocol used by this receiver.", + examples=["http", "grpc"], + ) + + +class Receiver(BaseModel): + """Specification of an active receiver.""" + + protocol: ProtocolType = Field(..., description="Receiver protocol name and type.") + url: str = Field( + ..., + description="""URL at which the receiver is reachable. If there's an ingress, it would be the external URL. + Otherwise, it would be the service's fqdn or internal IP. + If the protocol type is grpc, the url will not contain a scheme.""", + examples=[ + "http://traefik_address:2331", + "https://traefik_address:2331", + "http://tempo_public_ip:2331", + "https://tempo_public_ip:2331", + "tempo_public_ip:2331", + ], + ) + + +class TracingProviderAppData(DatabagModel): # noqa: D101 + """Application databag model for the tracing provider.""" + + receivers: List[Receiver] = Field( + ..., + description="List of all receivers enabled on the tracing provider.", + ) + + +class TracingRequirerAppData(DatabagModel): # noqa: D101 + """Application databag model for the tracing requirer.""" + + receivers: List[ReceiverProtocol] + """Requested receivers.""" + + +class _AutoSnapshotEvent(RelationEvent): + __args__: Tuple[str, ...] = () + __optional_kwargs__: Dict[str, Any] = {} + + @classmethod + def __attrs__(cls): + return cls.__args__ + tuple(cls.__optional_kwargs__.keys()) + + def __init__(self, handle, relation, *args, **kwargs): + super().__init__(handle, relation) + + if not len(self.__args__) == len(args): + raise TypeError("expected {} args, got {}".format(len(self.__args__), len(args))) + + for attr, obj in zip(self.__args__, args): + setattr(self, attr, obj) + for attr, default in self.__optional_kwargs__.items(): + obj = kwargs.get(attr, default) + setattr(self, attr, obj) + + def snapshot(self) -> dict: + dct = super().snapshot() + for attr in self.__attrs__(): + obj = getattr(self, attr) + try: + dct[attr] = obj + except ValueError as e: + raise ValueError( + "cannot automagically serialize {}: " + "override this method and do it " + "manually.".format(obj) + ) from e + + return dct + + def restore(self, snapshot: dict) -> None: + super().restore(snapshot) + for attr, obj in snapshot.items(): + setattr(self, attr, obj) + + +class RelationNotFoundError(Exception): + """Raised if no relation with the given name is found.""" + + def __init__(self, relation_name: str): + self.relation_name = relation_name + self.message = "No relation named '{}' found".format(relation_name) + super().__init__(self.message) + + +class RelationInterfaceMismatchError(Exception): + """Raised if the relation with the given name has an unexpected interface.""" + + def __init__( + self, + relation_name: str, + expected_relation_interface: str, + actual_relation_interface: str, + ): + self.relation_name = relation_name + self.expected_relation_interface = expected_relation_interface + self.actual_relation_interface = actual_relation_interface + self.message = ( + "The '{}' relation has '{}' as interface rather than the expected '{}'".format( + relation_name, actual_relation_interface, expected_relation_interface + ) + ) + + super().__init__(self.message) + + +class RelationRoleMismatchError(Exception): + """Raised if the relation with the given name has a different role than expected.""" + + def __init__( + self, + relation_name: str, + expected_relation_role: RelationRole, + actual_relation_role: RelationRole, + ): + self.relation_name = relation_name + self.expected_relation_interface = expected_relation_role + self.actual_relation_role = actual_relation_role + self.message = "The '{}' relation has role '{}' rather than the expected '{}'".format( + relation_name, repr(actual_relation_role), repr(expected_relation_role) + ) + + super().__init__(self.message) + + +def _validate_relation_by_interface_and_direction( + charm: CharmBase, + relation_name: str, + expected_relation_interface: str, + expected_relation_role: RelationRole, +): + """Validate a relation. + + Verifies that the `relation_name` provided: (1) exists in metadata.yaml, + (2) declares as interface the interface name passed as `relation_interface` + and (3) has the right "direction", i.e., it is a relation that `charm` + provides or requires. + + Args: + charm: a `CharmBase` object to scan for the matching relation. + relation_name: the name of the relation to be verified. + expected_relation_interface: the interface name to be matched by the + relation named `relation_name`. + expected_relation_role: whether the `relation_name` must be either + provided or required by `charm`. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the same relation interface + as specified via the `expected_relation_interface` argument. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the same role as specified + via the `expected_relation_role` argument. + """ + if relation_name not in charm.meta.relations: + raise RelationNotFoundError(relation_name) + + relation = charm.meta.relations[relation_name] + + # fixme: why do we need to cast here? + actual_relation_interface = cast(str, relation.interface_name) + + if actual_relation_interface != expected_relation_interface: + raise RelationInterfaceMismatchError( + relation_name, expected_relation_interface, actual_relation_interface + ) + + if expected_relation_role is RelationRole.provides: + if relation_name not in charm.meta.provides: + raise RelationRoleMismatchError( + relation_name, RelationRole.provides, RelationRole.requires + ) + elif expected_relation_role is RelationRole.requires: + if relation_name not in charm.meta.requires: + raise RelationRoleMismatchError( + relation_name, RelationRole.requires, RelationRole.provides + ) + else: + raise TypeError("Unexpected RelationDirection: {}".format(expected_relation_role)) + + +class RequestEvent(RelationEvent): + """Event emitted when a remote requests a tracing endpoint.""" + + @property + def requested_receivers(self) -> List[ReceiverProtocol]: + """List of receiver protocols that have been requested.""" + relation = self.relation + app = relation.app + if not app: + raise NotReadyError("relation.app is None") + + return TracingRequirerAppData.load(relation.data[app]).receivers + + +class BrokenEvent(RelationBrokenEvent): + """Event emitted when a relation on tracing is broken.""" + + +class TracingEndpointProviderEvents(CharmEvents): + """TracingEndpointProvider events.""" + + request = EventSource(RequestEvent) + broken = EventSource(BrokenEvent) + + +class TracingEndpointProvider(Object): + """Class representing a trace receiver service.""" + + on = TracingEndpointProviderEvents() # type: ignore + + def __init__( + self, + charm: CharmBase, + external_url: Optional[str] = None, + relation_name: str = DEFAULT_RELATION_NAME, + ): + """Initialize. + + Args: + charm: a `CharmBase` instance that manages this instance of the Tempo service. + external_url: external address of the node hosting the tempo server, + if an ingress is present. + relation_name: an optional string name of the relation between `charm` + and the Tempo charmed service. The default is "tracing". + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `tracing` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.requires` + role. + """ + _validate_relation_by_interface_and_direction( + charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.provides + ) + + super().__init__(charm, relation_name + "tracing-provider") + self._charm = charm + self._external_url = external_url + self._relation_name = relation_name + self.framework.observe( + self._charm.on[relation_name].relation_joined, self._on_relation_event + ) + self.framework.observe( + self._charm.on[relation_name].relation_created, self._on_relation_event + ) + self.framework.observe( + self._charm.on[relation_name].relation_changed, self._on_relation_event + ) + self.framework.observe( + self._charm.on[relation_name].relation_broken, self._on_relation_broken_event + ) + + def _on_relation_broken_event(self, e: RelationBrokenEvent): + """Handle relation broken events.""" + self.on.broken.emit(e.relation) + + def _on_relation_event(self, e: RelationEvent): + """Handle relation created/joined/changed events.""" + if self.is_requirer_ready(e.relation): + self.on.request.emit(e.relation) + + def is_requirer_ready(self, relation: Relation): + """Attempt to determine if requirer has already populated app data.""" + try: + self._get_requested_protocols(relation) + except NotReadyError: + return False + return True + + @staticmethod + def _get_requested_protocols(relation: Relation): + app = relation.app + if not app: + raise NotReadyError("relation.app is None") + + try: + databag = TracingRequirerAppData.load(relation.data[app]) + except (json.JSONDecodeError, pydantic.ValidationError, DataValidationError): + logger.info(f"relation {relation} is not ready to talk tracing") + raise NotReadyError() + return databag.receivers + + def requested_protocols(self): + """All receiver protocols that have been requested by our related apps.""" + requested_protocols = set() + for relation in self.relations: + try: + protocols = self._get_requested_protocols(relation) + except NotReadyError: + continue + requested_protocols.update(protocols) + return requested_protocols + + @property + def relations(self) -> List[Relation]: + """All relations active on this endpoint.""" + return self._charm.model.relations[self._relation_name] + + def publish_receivers(self, receivers: Sequence[RawReceiver]): + """Let all requirers know that these receivers are active and listening.""" + if not self._charm.unit.is_leader(): + raise RuntimeError("only leader can do this") + + for relation in self.relations: + try: + TracingProviderAppData( + receivers=[ + Receiver( + url=url, + protocol=ProtocolType( + name=protocol, + type=receiver_protocol_to_transport_protocol[protocol], + ), + ) + for protocol, url in receivers + ], + ).dump(relation.data[self._charm.app]) + + except ModelError as e: + # args are bytes + msg = e.args[0] + if isinstance(msg, bytes): + if msg.startswith( + b"ERROR cannot read relation application settings: permission denied" + ): + logger.error( + f"encountered error {e} while attempting to update_relation_data." + f"The relation must be gone." + ) + continue + raise + + +class EndpointRemovedEvent(RelationBrokenEvent): + """Event representing a change in one of the receiver endpoints.""" + + +class EndpointChangedEvent(_AutoSnapshotEvent): + """Event representing a change in one of the receiver endpoints.""" + + __args__ = ("_receivers",) + + if TYPE_CHECKING: + _receivers = [] # type: List[dict] + + @property + def receivers(self) -> List[Receiver]: + """Cast receivers back from dict.""" + return [Receiver(**i) for i in self._receivers] + + +class TracingEndpointRequirerEvents(CharmEvents): + """TracingEndpointRequirer events.""" + + endpoint_changed = EventSource(EndpointChangedEvent) + endpoint_removed = EventSource(EndpointRemovedEvent) + + +class TracingEndpointRequirer(Object): + """A tracing endpoint for Tempo.""" + + on = TracingEndpointRequirerEvents() # type: ignore + + def __init__( + self, + charm: CharmBase, + relation_name: str = DEFAULT_RELATION_NAME, + protocols: Optional[List[ReceiverProtocol]] = None, + ): + """Construct a tracing requirer for a Tempo charm. + + If your application supports pushing traces to a distributed tracing backend, the + `TracingEndpointRequirer` object enables your charm to easily access endpoint information + exchanged over a `tracing` relation interface. + + Args: + charm: a `CharmBase` object that manages this + `TracingEndpointRequirer` object. Typically, this is `self` in the instantiating + class. + relation_name: an optional string name of the relation between `charm` + and the Tempo charmed service. The default is "tracing". It is strongly + advised not to change the default, so that people deploying your charm will have a + consistent experience with all other charms that provide tracing endpoints. + protocols: optional list of protocols that the charm intends to send traces with. + The provider will enable receivers for these and only these protocols, + so be sure to enable all protocols the charm or its workload are going to need. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `tracing` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.provides` + role. + """ + _validate_relation_by_interface_and_direction( + charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.requires + ) + + super().__init__(charm, relation_name) + + self._is_single_endpoint = charm.meta.relations[relation_name].limit == 1 + + self._charm = charm + self._relation_name = relation_name + + events = self._charm.on[self._relation_name] + self.framework.observe(events.relation_changed, self._on_tracing_relation_changed) + self.framework.observe(events.relation_broken, self._on_tracing_relation_broken) + + if protocols: + self.request_protocols(protocols) + + def request_protocols( + self, protocols: Sequence[ReceiverProtocol], relation: Optional[Relation] = None + ): + """Publish the list of protocols which the provider should activate.""" + # todo: should we check if _is_single_endpoint and len(self.relations) > 1 and raise, here? + relations = [relation] if relation else self.relations + + if not protocols: + # empty sequence + raise ValueError( + "You need to pass a nonempty sequence of protocols to `request_protocols`." + ) + + try: + if self._charm.unit.is_leader(): + for relation in relations: + TracingRequirerAppData( + receivers=list(protocols), + ).dump(relation.data[self._charm.app]) + + except ModelError as e: + # args are bytes + msg = e.args[0] + if isinstance(msg, bytes): + if msg.startswith( + b"ERROR cannot read relation application settings: permission denied" + ): + logger.error( + f"encountered error {e} while attempting to request_protocols." + f"The relation must be gone." + ) + return + raise + + @property + def relations(self) -> List[Relation]: + """The tracing relations associated with this endpoint.""" + return self._charm.model.relations[self._relation_name] + + @property + def _relation(self) -> Optional[Relation]: + """If this wraps a single endpoint, the relation bound to it, if any.""" + if not self._is_single_endpoint: + objname = type(self).__name__ + raise AmbiguousRelationUsageError( + f"This {objname} wraps a {self._relation_name} endpoint that has " + "limit != 1. We can't determine what relation, of the possibly many, you are " + f"talking about. Please pass a relation instance while calling {objname}, " + "or set limit=1 in the charm metadata." + ) + relations = self.relations + return relations[0] if relations else None + + def is_ready(self, relation: Optional[Relation] = None): + """Is this endpoint ready?""" + relation = relation or self._relation + if not relation: + logger.debug(f"no relation on {self._relation_name !r}: tracing not ready") + return False + if relation.data is None: + logger.error(f"relation data is None for {relation}") + return False + if not relation.app: + logger.error(f"{relation} event received but there is no relation.app") + return False + try: + databag = dict(relation.data[relation.app]) + TracingProviderAppData.load(databag) + + except (json.JSONDecodeError, pydantic.ValidationError, DataValidationError): + logger.info(f"failed validating relation data for {relation}") + return False + return True + + def _on_tracing_relation_changed(self, event): + """Notify the providers that there is new endpoint information available.""" + relation = event.relation + if not self.is_ready(relation): + self.on.endpoint_removed.emit(relation) # type: ignore + return + + data = TracingProviderAppData.load(relation.data[relation.app]) + self.on.endpoint_changed.emit(relation, [i.dict() for i in data.receivers]) # type: ignore + + def _on_tracing_relation_broken(self, event: RelationBrokenEvent): + """Notify the providers that the endpoint is broken.""" + relation = event.relation + self.on.endpoint_removed.emit(relation) # type: ignore + + def get_all_endpoints( + self, relation: Optional[Relation] = None + ) -> Optional[TracingProviderAppData]: + """Unmarshalled relation data.""" + relation = relation or self._relation + if not self.is_ready(relation): + return + return TracingProviderAppData.load(relation.data[relation.app]) # type: ignore + + def _get_endpoint( + self, relation: Optional[Relation], protocol: ReceiverProtocol + ) -> Optional[str]: + app_data = self.get_all_endpoints(relation) + if not app_data: + return None + receivers: List[Receiver] = list( + filter(lambda i: i.protocol.name == protocol, app_data.receivers) + ) + if not receivers: + # it can happen if the charm requests tracing protocols, but the relay (such as grafana-agent) isn't yet + # connected to the tracing backend. In this case, it's not an error the charm author can do anything about + logger.warning(f"no receiver found with protocol={protocol!r}.") + return + if len(receivers) > 1: + # if we have more than 1 receiver that matches, it shouldn't matter which receiver we'll be using. + logger.warning( + f"too many receivers with protocol={protocol!r}; using first one. Found: {receivers}" + ) + + receiver = receivers[0] + return receiver.url + + def get_endpoint( + self, protocol: ReceiverProtocol, relation: Optional[Relation] = None + ) -> Optional[str]: + """Receiver endpoint for the given protocol. + + It could happen that this function gets called before the provider publishes the endpoints. + In such a scenario, if a non-leader unit calls this function, a permission denied exception will be raised due to + restricted access. To prevent this, this function needs to be guarded by the `is_ready` check. + + Raises: + ProtocolNotRequestedError: + If the charm unit is the leader unit and attempts to obtain an endpoint for a protocol it did not request. + """ + endpoint = self._get_endpoint(relation or self._relation, protocol=protocol) + if not endpoint: + requested_protocols = set() + relations = [relation] if relation else self.relations + for relation in relations: + try: + databag = TracingRequirerAppData.load(relation.data[self._charm.app]) + except DataValidationError: + continue + + requested_protocols.update(databag.receivers) + + if protocol not in requested_protocols: + raise ProtocolNotRequestedError(protocol, relation) + + return None + return endpoint + + +def charm_tracing_config( + endpoint_requirer: TracingEndpointRequirer, cert_path: Optional[Union[Path, str]] +) -> Tuple[Optional[str], Optional[str]]: + """Return the charm_tracing config you likely want. + + If no endpoint is provided: + disable charm tracing. + If https endpoint is provided but cert_path is not found on disk: + disable charm tracing. + If https endpoint is provided and cert_path is None: + ERROR + Else: + proceed with charm tracing (with or without tls, as appropriate) + + Usage: + >>> from lib.charms.tempo_coordinator_k8s.v0.charm_tracing import trace_charm + >>> from lib.charms.tempo_coordinator_k8s.v0.tracing import charm_tracing_config + >>> @trace_charm(tracing_endpoint="my_endpoint", cert_path="cert_path") + >>> class MyCharm(...): + >>> _cert_path = "/path/to/cert/on/charm/container.crt" + >>> def __init__(self, ...): + >>> self.tracing = TracingEndpointRequirer(...) + >>> self.my_endpoint, self.cert_path = charm_tracing_config( + ... self.tracing, self._cert_path) + """ + if not endpoint_requirer.is_ready(): + return None, None + + endpoint = endpoint_requirer.get_endpoint("otlp_http") + if not endpoint: + return None, None + + is_https = endpoint.startswith("https://") + + if is_https: + if cert_path is None or not Path(cert_path).exists(): + # disable charm tracing until we obtain a cert to prevent tls errors + logger.error( + "Tracing endpoint is https, but no server_cert has been passed." + "Please point @trace_charm to a `server_cert` attr. " + "This might also mean that the tracing provider is related to a " + "certificates provider, but this application is not (yet). " + "In that case, you might just have to wait a bit for the certificates " + "integration to settle. " + ) + return None, None + return endpoint, str(cert_path) + else: + return endpoint, None diff --git a/examples/fastapi/charm/lib/charms/traefik_k8s/v2/ingress.py b/examples/fastapi/charm/lib/charms/traefik_k8s/v2/ingress.py index 582a31f..bb7ac5e 100644 --- a/examples/fastapi/charm/lib/charms/traefik_k8s/v2/ingress.py +++ b/examples/fastapi/charm/lib/charms/traefik_k8s/v2/ingress.py @@ -1,4 +1,4 @@ -# Copyright 2025 Canonical Ltd. +# Copyright 2024 Canonical Ltd. # See LICENSE file for licensing details. r"""# Interface Library for ingress. @@ -56,13 +56,14 @@ def _on_ingress_revoked(self, event: IngressPerAppRevokedEvent): import socket import typing from dataclasses import dataclass +from functools import partial from typing import Any, Callable, Dict, List, MutableMapping, Optional, Sequence, Tuple, Union import pydantic from ops.charm import CharmBase, RelationBrokenEvent, RelationEvent from ops.framework import EventSource, Object, ObjectEvents, StoredState from ops.model import ModelError, Relation, Unit -from pydantic import AnyHttpUrl, BaseModel, Field, validator +from pydantic import AnyHttpUrl, BaseModel, Field # The unique Charmhub library identifier, never change it LIBID = "e6de2a5cd5b34422a204668f3b8f90d2" @@ -72,7 +73,7 @@ def _on_ingress_revoked(self, event: IngressPerAppRevokedEvent): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 13 +LIBPATCH = 14 PYDEPS = ["pydantic"] @@ -84,6 +85,9 @@ def _on_ingress_revoked(self, event: IngressPerAppRevokedEvent): PYDANTIC_IS_V1 = int(pydantic.version.VERSION.split(".")[0]) < 2 if PYDANTIC_IS_V1: + from pydantic import validator + + input_validator = partial(validator, pre=True) class DatabagModel(BaseModel): # type: ignore """Base databag model.""" @@ -143,7 +147,9 @@ def dump(self, databag: Optional[MutableMapping] = None, clear: bool = True): return databag else: - from pydantic import ConfigDict + from pydantic import ConfigDict, field_validator + + input_validator = partial(field_validator, mode="before") class DatabagModel(BaseModel): """Base databag model.""" @@ -171,7 +177,7 @@ def load(cls, databag: MutableMapping): k: json.loads(v) for k, v in databag.items() # Don't attempt to parse model-external values - if k in {(f.alias or n) for n, f in cls.__fields__.items()} # type: ignore + if k in {(f.alias or n) for n, f in cls.model_fields.items()} # type: ignore } except json.JSONDecodeError as e: msg = f"invalid databag contents: expecting json. {databag}" @@ -252,14 +258,14 @@ class IngressRequirerAppData(DatabagModel): default="http", description="What scheme to use in the generated ingress url" ) - @validator("scheme", pre=True) + @input_validator("scheme") def validate_scheme(cls, scheme): # noqa: N805 # pydantic wants 'cls' as first arg """Validate scheme arg.""" if scheme not in {"http", "https", "h2c"}: raise ValueError("invalid scheme: should be one of `http|https|h2c`") return scheme - @validator("port", pre=True) + @input_validator("port") def validate_port(cls, port): # noqa: N805 # pydantic wants 'cls' as first arg """Validate port.""" assert isinstance(port, int), type(port) @@ -277,13 +283,13 @@ class IngressRequirerUnitData(DatabagModel): "IP can only be None if the IP information can't be retrieved from juju.", ) - @validator("host", pre=True) + @input_validator("host") def validate_host(cls, host): # noqa: N805 # pydantic wants 'cls' as first arg """Validate host.""" assert isinstance(host, str), type(host) return host - @validator("ip", pre=True) + @input_validator("ip") def validate_ip(cls, ip): # noqa: N805 # pydantic wants 'cls' as first arg """Validate ip.""" if ip is None: @@ -462,7 +468,10 @@ def _handle_relation(self, event): event.relation, data.app.name, data.app.model, - [unit.dict() for unit in data.units], + [ + unit.dict() if PYDANTIC_IS_V1 else unit.model_dump(mode="json") + for unit in data.units + ], data.app.strip_prefix or False, data.app.redirect_https or False, ) diff --git a/examples/flask/lib/charms/data_platform_libs/v0/data_interfaces.py b/examples/flask/lib/charms/data_platform_libs/v0/data_interfaces.py index b331bdc..3bc2dd8 100644 --- a/examples/flask/lib/charms/data_platform_libs/v0/data_interfaces.py +++ b/examples/flask/lib/charms/data_platform_libs/v0/data_interfaces.py @@ -331,10 +331,14 @@ def _on_topic_requested(self, event: TopicRequestedEvent): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 36 +LIBPATCH = 40 PYDEPS = ["ops>=2.0.0"] +# Starting from what LIBPATCH number to apply legacy solutions +# v0.17 was the last version without secrets +LEGACY_SUPPORT_FROM = 17 + logger = logging.getLogger(__name__) Diff = namedtuple("Diff", "added changed deleted") @@ -351,36 +355,16 @@ def _on_topic_requested(self, event: TopicRequestedEvent): GROUP_MAPPING_FIELD = "secret_group_mapping" GROUP_SEPARATOR = "@" +MODEL_ERRORS = { + "not_leader": "this unit is not the leader", + "no_label_and_uri": "ERROR either URI or label should be used for getting an owned secret but not both", + "owner_no_refresh": "ERROR secret owner cannot use --refresh", +} -class SecretGroup(str): - """Secret groups specific type.""" - - -class SecretGroupsAggregate(str): - """Secret groups with option to extend with additional constants.""" - - def __init__(self): - self.USER = SecretGroup("user") - self.TLS = SecretGroup("tls") - self.EXTRA = SecretGroup("extra") - - def __setattr__(self, name, value): - """Setting internal constants.""" - if name in self.__dict__: - raise RuntimeError("Can't set constant!") - else: - super().__setattr__(name, SecretGroup(value)) - - def groups(self) -> list: - """Return the list of stored SecretGroups.""" - return list(self.__dict__.values()) - - def get_group(self, group: str) -> Optional[SecretGroup]: - """If the input str translates to a group name, return that.""" - return SecretGroup(group) if group in self.groups() else None - -SECRET_GROUPS = SecretGroupsAggregate() +############################################################################## +# Exceptions +############################################################################## class DataInterfacesError(Exception): @@ -407,6 +391,19 @@ class IllegalOperationError(DataInterfacesError): """To be used when an operation is not allowed to be performed.""" +class PrematureDataAccessError(DataInterfacesError): + """To be raised when the Relation Data may be accessed (written) before protocol init complete.""" + + +############################################################################## +# Global helpers / utilities +############################################################################## + +############################################################################## +# Databag handling and comparison methods +############################################################################## + + def get_encoded_dict( relation: Relation, member: Union[Unit, Application], field: str ) -> Optional[Dict[str, str]]: @@ -482,6 +479,11 @@ def diff(event: RelationChangedEvent, bucket: Optional[Union[Unit, Application]] return Diff(added, changed, deleted) +############################################################################## +# Module decorators +############################################################################## + + def leader_only(f): """Decorator to ensure that only leader can perform given operation.""" @@ -536,6 +538,36 @@ def wrapper(self, *args, **kwargs): return wrapper +def legacy_apply_from_version(version: int) -> Callable: + """Decorator to decide whether to apply a legacy function or not. + + Based on LEGACY_SUPPORT_FROM module variable value, the importer charm may only want + to apply legacy solutions starting from a specific LIBPATCH. + + NOTE: All 'legacy' functions have to be defined and called in a way that they return `None`. + This results in cleaner and more secure execution flows in case the function may be disabled. + This requirement implicitly means that legacy functions change the internal state strictly, + don't return information. + """ + + def decorator(f: Callable[..., None]): + """Signature is ensuring None return value.""" + f.legacy_version = version + + def wrapper(self, *args, **kwargs) -> None: + if version >= LEGACY_SUPPORT_FROM: + return f(self, *args, **kwargs) + + return wrapper + + return decorator + + +############################################################################## +# Helper classes +############################################################################## + + class Scope(Enum): """Peer relations scope.""" @@ -543,9 +575,35 @@ class Scope(Enum): UNIT = "unit" -################################################################################ -# Secrets internal caching -################################################################################ +class SecretGroup(str): + """Secret groups specific type.""" + + +class SecretGroupsAggregate(str): + """Secret groups with option to extend with additional constants.""" + + def __init__(self): + self.USER = SecretGroup("user") + self.TLS = SecretGroup("tls") + self.EXTRA = SecretGroup("extra") + + def __setattr__(self, name, value): + """Setting internal constants.""" + if name in self.__dict__: + raise RuntimeError("Can't set constant!") + else: + super().__setattr__(name, SecretGroup(value)) + + def groups(self) -> list: + """Return the list of stored SecretGroups.""" + return list(self.__dict__.values()) + + def get_group(self, group: str) -> Optional[SecretGroup]: + """If the input str translates to a group name, return that.""" + return SecretGroup(group) if group in self.groups() else None + + +SECRET_GROUPS = SecretGroupsAggregate() class CachedSecret: @@ -554,6 +612,8 @@ class CachedSecret: The data structure is precisely re-using/simulating as in the actual Secret Storage """ + KNOWN_MODEL_ERRORS = [MODEL_ERRORS["no_label_and_uri"], MODEL_ERRORS["owner_no_refresh"]] + def __init__( self, model: Model, @@ -571,6 +631,95 @@ def __init__( self.legacy_labels = legacy_labels self.current_label = None + @property + def meta(self) -> Optional[Secret]: + """Getting cached secret meta-information.""" + if not self._secret_meta: + if not (self._secret_uri or self.label): + return + + try: + self._secret_meta = self._model.get_secret(label=self.label) + except SecretNotFoundError: + # Falling back to seeking for potential legacy labels + self._legacy_compat_find_secret_by_old_label() + + # If still not found, to be checked by URI, to be labelled with the proposed label + if not self._secret_meta and self._secret_uri: + self._secret_meta = self._model.get_secret(id=self._secret_uri, label=self.label) + return self._secret_meta + + ########################################################################## + # Backwards compatibility / Upgrades + ########################################################################## + # These functions are used to keep backwards compatibility on rolling upgrades + # Policy: + # All data is kept intact until the first write operation. (This allows a minimal + # grace period during which rollbacks are fully safe. For more info see the spec.) + # All data involves: + # - databag contents + # - secrets content + # - secret labels (!!!) + # Legacy functions must return None, and leave an equally consistent state whether + # they are executed or skipped (as a high enough versioned execution environment may + # not require so) + + # Compatibility + + @legacy_apply_from_version(34) + def _legacy_compat_find_secret_by_old_label(self) -> None: + """Compatibility function, allowing to find a secret by a legacy label. + + This functionality is typically needed when secret labels changed over an upgrade. + Until the first write operation, we need to maintain data as it was, including keeping + the old secret label. In order to keep track of the old label currently used to access + the secret, and additional 'current_label' field is being defined. + """ + for label in self.legacy_labels: + try: + self._secret_meta = self._model.get_secret(label=label) + except SecretNotFoundError: + pass + else: + if label != self.label: + self.current_label = label + return + + # Migrations + + @legacy_apply_from_version(34) + def _legacy_migration_to_new_label_if_needed(self) -> None: + """Helper function to re-create the secret with a different label. + + Juju does not provide a way to change secret labels. + Thus whenever moving from secrets version that involves secret label changes, + we "re-create" the existing secret, and attach the new label to the new + secret, to be used from then on. + + Note: we replace the old secret with a new one "in place", as we can't + easily switch the containing SecretCache structure to point to a new secret. + Instead we are changing the 'self' (CachedSecret) object to point to the + new instance. + """ + if not self.current_label or not (self.meta and self._secret_meta): + return + + # Create a new secret with the new label + content = self._secret_meta.get_content() + self._secret_uri = None + + # It will be nice to have the possibility to check if we are the owners of the secret... + try: + self._secret_meta = self.add_secret(content, label=self.label) + except ModelError as err: + if MODEL_ERRORS["not_leader"] not in str(err): + raise + self.current_label = None + + ########################################################################## + # Public functions + ########################################################################## + def add_secret( self, content: Dict[str, str], @@ -593,28 +742,6 @@ def add_secret( self._secret_meta = secret return self._secret_meta - @property - def meta(self) -> Optional[Secret]: - """Getting cached secret meta-information.""" - if not self._secret_meta: - if not (self._secret_uri or self.label): - return - - for label in [self.label] + self.legacy_labels: - try: - self._secret_meta = self._model.get_secret(label=label) - except SecretNotFoundError: - pass - else: - if label != self.label: - self.current_label = label - break - - # If still not found, to be checked by URI, to be labelled with the proposed label - if not self._secret_meta and self._secret_uri: - self._secret_meta = self._model.get_secret(id=self._secret_uri, label=self.label) - return self._secret_meta - def get_content(self) -> Dict[str, str]: """Getting cached secret content.""" if not self._secret_content: @@ -624,42 +751,25 @@ def get_content(self) -> Dict[str, str]: except (ValueError, ModelError) as err: # https://bugs.launchpad.net/juju/+bug/2042596 # Only triggered when 'refresh' is set - known_model_errors = [ - "ERROR either URI or label should be used for getting an owned secret but not both", - "ERROR secret owner cannot use --refresh", - ] if isinstance(err, ModelError) and not any( - msg in str(err) for msg in known_model_errors + msg in str(err) for msg in self.KNOWN_MODEL_ERRORS ): raise # Due to: ValueError: Secret owner cannot use refresh=True self._secret_content = self.meta.get_content() return self._secret_content - def _move_to_new_label_if_needed(self): - """Helper function to re-create the secret with a different label.""" - if not self.current_label or not (self.meta and self._secret_meta): - return - - # Create a new secret with the new label - content = self._secret_meta.get_content() - self._secret_uri = None - - # I wish we could just check if we are the owners of the secret... - try: - self._secret_meta = self.add_secret(content, label=self.label) - except ModelError as err: - if "this unit is not the leader" not in str(err): - raise - self.current_label = None - def set_content(self, content: Dict[str, str]) -> None: """Setting cached secret content.""" if not self.meta: return + # DPE-4182: do not create new revision if the content stay the same + if content == self.get_content(): + return + if content: - self._move_to_new_label_if_needed() + self._legacy_migration_to_new_label_if_needed() self.meta.set_content(content) self._secret_content = content else: @@ -922,6 +1032,23 @@ def _delete_relation_data(self, relation: Relation, fields: List[str]) -> None: """Delete data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" raise NotImplementedError + # Optional overrides + + def _legacy_apply_on_fetch(self) -> None: + """This function should provide a list of compatibility functions to be applied when fetching (legacy) data.""" + pass + + def _legacy_apply_on_update(self, fields: List[str]) -> None: + """This function should provide a list of compatibility functions to be applied when writing data. + + Since data may be at a legacy version, migration may be mandatory. + """ + pass + + def _legacy_apply_on_delete(self, fields: List[str]) -> None: + """This function should provide a list of compatibility functions to be applied when deleting (legacy) data.""" + pass + # Internal helper methods @staticmethod @@ -1174,6 +1301,16 @@ def get_relation(self, relation_name, relation_id) -> Relation: return relation + def get_secret_uri(self, relation: Relation, group: SecretGroup) -> Optional[str]: + """Get the secret URI for the corresponding group.""" + secret_field = self._generate_secret_field_name(group) + return relation.data[self.component].get(secret_field) + + def set_secret_uri(self, relation: Relation, group: SecretGroup, secret_uri: str) -> None: + """Set the secret URI for the corresponding group.""" + secret_field = self._generate_secret_field_name(group) + relation.data[self.component][secret_field] = secret_uri + def fetch_relation_data( self, relation_ids: Optional[List[int]] = None, @@ -1190,6 +1327,8 @@ def fetch_relation_data( a dict of the values stored in the relation data bag for all relation instances (indexed by the relation ID). """ + self._legacy_apply_on_fetch() + if not relation_name: relation_name = self.relation_name @@ -1228,6 +1367,8 @@ def fetch_my_relation_data( NOTE: Since only the leader can read the relation's 'this_app'-side Application databag, the functionality is limited to leaders """ + self._legacy_apply_on_fetch() + if not relation_name: relation_name = self.relation_name @@ -1259,6 +1400,8 @@ def fetch_my_relation_field( @leader_only def update_relation_data(self, relation_id: int, data: dict) -> None: """Update the data within the relation.""" + self._legacy_apply_on_update(list(data.keys())) + relation_name = self.relation_name relation = self.get_relation(relation_name, relation_id) return self._update_relation_data(relation, data) @@ -1266,6 +1409,8 @@ def update_relation_data(self, relation_id: int, data: dict) -> None: @leader_only def delete_relation_data(self, relation_id: int, fields: List[str]) -> None: """Remove field from the relation.""" + self._legacy_apply_on_delete(fields) + relation_name = self.relation_name relation = self.get_relation(relation_name, relation_id) return self._delete_relation_data(relation, fields) @@ -1312,6 +1457,8 @@ def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: class ProviderData(Data): """Base provides-side of the data products relation.""" + RESOURCE_FIELD = "database" + def __init__( self, model: Model, @@ -1332,8 +1479,7 @@ def _add_relation_secret( uri_to_databag=True, ) -> bool: """Add a new Juju Secret that will be registered in the relation databag.""" - secret_field = self._generate_secret_field_name(group_mapping) - if uri_to_databag and relation.data[self.component].get(secret_field): + if uri_to_databag and self.get_secret_uri(relation, group_mapping): logging.error("Secret for relation %s already exists, not adding again", relation.id) return False @@ -1344,7 +1490,7 @@ def _add_relation_secret( # According to lint we may not have a Secret ID if uri_to_databag and secret.meta and secret.meta.id: - relation.data[self.component][secret_field] = secret.meta.id + self.set_secret_uri(relation, group_mapping, secret.meta.id) # Return the content that was added return True @@ -1445,8 +1591,7 @@ def _get_relation_secret( if not relation: return - secret_field = self._generate_secret_field_name(group_mapping) - if secret_uri := relation.data[self.local_app].get(secret_field): + if secret_uri := self.get_secret_uri(relation, group_mapping): return self.secrets.get(label, secret_uri) def _fetch_specific_relation_data( @@ -1479,6 +1624,15 @@ def _fetch_my_specific_relation_data( def _update_relation_data(self, relation: Relation, data: Dict[str, str]) -> None: """Set values for fields not caring whether it's a secret or not.""" req_secret_fields = [] + + keys = set(data.keys()) + if self.fetch_relation_field(relation.id, self.RESOURCE_FIELD) is None and ( + keys - {"endpoints", "read-only-endpoints", "replset"} + ): + raise PrematureDataAccessError( + "Premature access to relation data, update is forbidden before the connection is initialized." + ) + if relation.app: req_secret_fields = get_encoded_list(relation, relation.app, REQ_SECRET_FIELDS) @@ -1599,11 +1753,10 @@ def _register_secrets_to_relation(self, relation: Relation, params_name_list: Li for group in SECRET_GROUPS.groups(): secret_field = self._generate_secret_field_name(group) - if secret_field in params_name_list: - if secret_uri := relation.data[relation.app].get(secret_field): - self._register_secret_to_relation( - relation.name, relation.id, secret_uri, group - ) + if secret_field in params_name_list and ( + secret_uri := self.get_secret_uri(relation, group) + ): + self._register_secret_to_relation(relation.name, relation.id, secret_uri, group) def _is_resource_created_for_relation(self, relation: Relation) -> bool: if not relation.app: @@ -1614,6 +1767,17 @@ def _is_resource_created_for_relation(self, relation: Relation) -> bool: ) return bool(data.get("username")) and bool(data.get("password")) + # Public functions + + def get_secret_uri(self, relation: Relation, group: SecretGroup) -> Optional[str]: + """Getting relation secret URI for the corresponding Secret Group.""" + secret_field = self._generate_secret_field_name(group) + return relation.data[relation.app].get(secret_field) + + def set_secret_uri(self, relation: Relation, group: SecretGroup, uri: str) -> None: + """Setting relation secret URI is not possible for a Requirer.""" + raise NotImplementedError("Requirer can not change the relation secret URI.") + def is_resource_created(self, relation_id: Optional[int] = None) -> bool: """Check if the resource has been created. @@ -1764,7 +1928,6 @@ def __init__( secret_field_name: Optional[str] = None, deleted_label: Optional[str] = None, ): - """Manager of base client relations.""" RequirerData.__init__( self, model, @@ -1775,6 +1938,11 @@ def __init__( self.secret_field_name = secret_field_name if secret_field_name else self.SECRET_FIELD_NAME self.deleted_label = deleted_label self._secret_label_map = {} + + # Legacy information holders + self._legacy_labels = [] + self._legacy_secret_uri = None + # Secrets that are being dynamically added within the scope of this event handler run self._new_secrets = [] self._additional_secret_group_mapping = additional_secret_group_mapping @@ -1849,10 +2017,12 @@ def set_secret( value: The string value of the secret group_mapping: The name of the "secret group", in case the field is to be added to an existing secret """ + self._legacy_apply_on_update([field]) + full_field = self._field_to_internal_name(field, group_mapping) if self.secrets_enabled and full_field not in self.current_secret_fields: self._new_secrets.append(full_field) - if self._no_group_with_databag(field, full_field): + if self.valid_field_pattern(field, full_field): self.update_relation_data(relation_id, {full_field: value}) # Unlike for set_secret(), there's no harm using this operation with static secrets @@ -1865,6 +2035,8 @@ def get_secret( group_mapping: Optional[SecretGroup] = None, ) -> Optional[str]: """Public interface method to fetch secrets only.""" + self._legacy_apply_on_fetch() + full_field = self._field_to_internal_name(field, group_mapping) if ( self.secrets_enabled @@ -1872,7 +2044,7 @@ def get_secret( and field not in self.current_secret_fields ): return - if self._no_group_with_databag(field, full_field): + if self.valid_field_pattern(field, full_field): return self.fetch_my_relation_field(relation_id, full_field) @dynamic_secrets_only @@ -1883,14 +2055,19 @@ def delete_secret( group_mapping: Optional[SecretGroup] = None, ) -> Optional[str]: """Public interface method to delete secrets only.""" + self._legacy_apply_on_delete([field]) + full_field = self._field_to_internal_name(field, group_mapping) if self.secrets_enabled and full_field not in self.current_secret_fields: logger.warning(f"Secret {field} from group {group_mapping} was not found") return - if self._no_group_with_databag(field, full_field): + + if self.valid_field_pattern(field, full_field): self.delete_relation_data(relation_id, [full_field]) + ########################################################################## # Helpers + ########################################################################## @staticmethod def _field_to_internal_name(field: str, group: Optional[SecretGroup]) -> str: @@ -1932,10 +2109,69 @@ def _content_for_secret_group( if k in self.secret_fields } - # Backwards compatibility + def valid_field_pattern(self, field: str, full_field: str) -> bool: + """Check that no secret group is attempted to be used together without secrets being enabled. + + Secrets groups are impossible to use with versions that are not yet supporting secrets. + """ + if not self.secrets_enabled and full_field != field: + logger.error( + f"Can't access {full_field}: no secrets available (i.e. no secret groups either)." + ) + return False + return True + + ########################################################################## + # Backwards compatibility / Upgrades + ########################################################################## + # These functions are used to keep backwards compatibility on upgrades + # Policy: + # All data is kept intact until the first write operation. (This allows a minimal + # grace period during which rollbacks are fully safe. For more info see spec.) + # All data involves: + # - databag + # - secrets content + # - secret labels (!!!) + # Legacy functions must return None, and leave an equally consistent state whether + # they are executed or skipped (as a high enough versioned execution environment may + # not require so) + + # Full legacy stack for each operation + + def _legacy_apply_on_fetch(self) -> None: + """All legacy functions to be applied on fetch.""" + relation = self._model.relations[self.relation_name][0] + self._legacy_compat_generate_prev_labels() + self._legacy_compat_secret_uri_from_databag(relation) + + def _legacy_apply_on_update(self, fields) -> None: + """All legacy functions to be applied on update.""" + relation = self._model.relations[self.relation_name][0] + self._legacy_compat_generate_prev_labels() + self._legacy_compat_secret_uri_from_databag(relation) + self._legacy_migration_remove_secret_from_databag(relation, fields) + self._legacy_migration_remove_secret_field_name_from_databag(relation) + + def _legacy_apply_on_delete(self, fields) -> None: + """All legacy functions to be applied on delete.""" + relation = self._model.relations[self.relation_name][0] + self._legacy_compat_generate_prev_labels() + self._legacy_compat_secret_uri_from_databag(relation) + self._legacy_compat_check_deleted_label(relation, fields) + + # Compatibility + + @legacy_apply_from_version(18) + def _legacy_compat_check_deleted_label(self, relation, fields) -> None: + """Helper function for legacy behavior. + + As long as https://bugs.launchpad.net/juju/+bug/2028094 wasn't fixed, + we did not delete fields but rather kept them in the secret with a string value + expressing invalidity. This function is maintainnig that behavior when needed. + """ + if not self.deleted_label: + return - def _check_deleted_label(self, relation, fields) -> None: - """Helper function for legacy behavior.""" current_data = self.fetch_my_relation_data([relation.id], fields) if current_data is not None: # Check if the secret we wanna delete actually exists @@ -1948,7 +2184,43 @@ def _check_deleted_label(self, relation, fields) -> None: ", ".join(non_existent), ) - def _remove_secret_from_databag(self, relation, fields: List[str]) -> None: + @legacy_apply_from_version(18) + def _legacy_compat_secret_uri_from_databag(self, relation) -> None: + """Fetching the secret URI from the databag, in case stored there.""" + self._legacy_secret_uri = relation.data[self.component].get( + self._generate_secret_field_name(), None + ) + + @legacy_apply_from_version(34) + def _legacy_compat_generate_prev_labels(self) -> None: + """Generator for legacy secret label names, for backwards compatibility. + + Secret label is part of the data that MUST be maintained across rolling upgrades. + In case there may be a change on a secret label, the old label must be recognized + after upgrades, and left intact until the first write operation -- when we roll over + to the new label. + + This function keeps "memory" of previously used secret labels. + NOTE: Return value takes decorator into account -- all 'legacy' functions may return `None` + + v0.34 (rev69): Fixing issue https://github.com/canonical/data-platform-libs/issues/155 + meant moving from '.' (i.e. 'mysql.app', 'mysql.unit') + to labels '..' (like 'peer.mysql.app') + """ + if self._legacy_labels: + return + + result = [] + members = [self._model.app.name] + if self.scope: + members.append(self.scope.value) + result.append(f"{'.'.join(members)}") + self._legacy_labels = result + + # Migration + + @legacy_apply_from_version(18) + def _legacy_migration_remove_secret_from_databag(self, relation, fields: List[str]) -> None: """For Rolling Upgrades -- when moving from databag to secrets usage. Practically what happens here is to remove stuff from the databag that is @@ -1962,10 +2234,16 @@ def _remove_secret_from_databag(self, relation, fields: List[str]) -> None: if self._fetch_relation_data_without_secrets(self.component, relation, [field]): self._delete_relation_data_without_secrets(self.component, relation, [field]) - def _remove_secret_field_name_from_databag(self, relation) -> None: + @legacy_apply_from_version(18) + def _legacy_migration_remove_secret_field_name_from_databag(self, relation) -> None: """Making sure that the old databag URI is gone. This action should not be executed more than once. + + There was a phase (before moving secrets usage to libs) when charms saved the peer + secret URI to the databag, and used this URI from then on to retrieve their secret. + When upgrading to charm versions using this library, we need to add a label to the + secret and access it via label from than on, and remove the old traces from the databag. """ # Nothing to do if 'internal-secret' is not in the databag if not (relation.data[self.component].get(self._generate_secret_field_name())): @@ -1981,25 +2259,9 @@ def _remove_secret_field_name_from_databag(self, relation) -> None: # Databag reference to the secret URI can be removed, now that it's labelled relation.data[self.component].pop(self._generate_secret_field_name(), None) - def _previous_labels(self) -> List[str]: - """Generator for legacy secret label names, for backwards compatibility.""" - result = [] - members = [self._model.app.name] - if self.scope: - members.append(self.scope.value) - result.append(f"{'.'.join(members)}") - return result - - def _no_group_with_databag(self, field: str, full_field: str) -> bool: - """Check that no secret group is attempted to be used together with databag.""" - if not self.secrets_enabled and full_field != field: - logger.error( - f"Can't access {full_field}: no secrets available (i.e. no secret groups either)." - ) - return False - return True - + ########################################################################## # Event handlers + ########################################################################## def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: """Event emitted when the relation has changed.""" @@ -2009,7 +2271,9 @@ def _on_secret_changed_event(self, event: SecretChangedEvent) -> None: """Event emitted when the secret has changed.""" pass + ########################################################################## # Overrides of Relation Data handling functions + ########################################################################## def _generate_secret_label( self, relation_name: str, relation_id: int, group_mapping: SecretGroup @@ -2046,13 +2310,14 @@ def _get_relation_secret( return label = self._generate_secret_label(relation_name, relation_id, group_mapping) - secret_uri = relation.data[self.component].get(self._generate_secret_field_name(), None) # URI or legacy label is only to applied when moving single legacy secret to a (new) label if group_mapping == SECRET_GROUPS.EXTRA: # Fetching the secret with fallback to URI (in case label is not yet known) # Label would we "stuck" on the secret in case it is found - return self.secrets.get(label, secret_uri, legacy_labels=self._previous_labels()) + return self.secrets.get( + label, self._legacy_secret_uri, legacy_labels=self._legacy_labels + ) return self.secrets.get(label) def _get_group_secret_contents( @@ -2082,7 +2347,6 @@ def _fetch_my_specific_relation_data( @either_static_or_dynamic_secrets def _update_relation_data(self, relation: Relation, data: Dict[str, str]) -> None: """Update data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" - self._remove_secret_from_databag(relation, list(data.keys())) _, normal_fields = self._process_secret_fields( relation, self.secret_fields, @@ -2091,7 +2355,6 @@ def _update_relation_data(self, relation: Relation, data: Dict[str, str]) -> Non data=data, uri_to_databag=False, ) - self._remove_secret_field_name_from_databag(relation) normal_content = {k: v for k, v in data.items() if k in normal_fields} self._update_relation_data_without_secrets(self.component, relation, normal_content) @@ -2100,8 +2363,6 @@ def _update_relation_data(self, relation: Relation, data: Dict[str, str]) -> Non def _delete_relation_data(self, relation: Relation, fields: List[str]) -> None: """Delete data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" if self.secret_fields and self.deleted_label: - # Legacy, backwards compatibility - self._check_deleted_label(relation, fields) _, normal_fields = self._process_secret_fields( relation, @@ -2137,7 +2398,9 @@ def fetch_relation_field( "fetch_my_relation_data() and fetch_my_relation_field()" ) + ########################################################################## # Public functions -- inherited + ########################################################################## fetch_my_relation_data = Data.fetch_my_relation_data fetch_my_relation_field = Data.fetch_my_relation_field @@ -2602,6 +2865,14 @@ def set_version(self, relation_id: int, version: str) -> None: """ self.update_relation_data(relation_id, {"version": version}) + def set_subordinated(self, relation_id: int) -> None: + """Raises the subordinated flag in the application relation databag. + + Args: + relation_id: the identifier for a particular relation. + """ + self.update_relation_data(relation_id, {"subordinated": "true"}) + class DatabaseProviderEventHandlers(EventHandlers): """Provider-side of the database relation handlers.""" @@ -2838,6 +3109,21 @@ def _on_relation_created_event(self, event: RelationCreatedEvent) -> None: def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: """Event emitted when the database relation has changed.""" + is_subordinate = False + remote_unit_data = None + for key in event.relation.data.keys(): + if isinstance(key, Unit) and not key.name.startswith(self.charm.app.name): + remote_unit_data = event.relation.data[key] + elif isinstance(key, Application) and key.name != self.charm.app.name: + is_subordinate = event.relation.data[key].get("subordinated") == "true" + + if is_subordinate: + if not remote_unit_data: + return + + if remote_unit_data.get("state") != "ready": + return + # Check which data has changed to emit customs events. diff = self._diff(event) @@ -3019,6 +3305,8 @@ class KafkaRequiresEvents(CharmEvents): class KafkaProviderData(ProviderData): """Provider-side of the Kafka relation.""" + RESOURCE_FIELD = "topic" + def __init__(self, model: Model, relation_name: str) -> None: super().__init__(model, relation_name) @@ -3268,6 +3556,8 @@ class OpenSearchRequiresEvents(CharmEvents): class OpenSearchProvidesData(ProviderData): """Provider-side of the OpenSearch relation.""" + RESOURCE_FIELD = "index" + def __init__(self, model: Model, relation_name: str) -> None: super().__init__(model, relation_name) diff --git a/examples/flask/lib/charms/loki_k8s/v0/loki_push_api.py b/examples/flask/lib/charms/loki_k8s/v0/loki_push_api.py index d5217f3..16e1294 100644 --- a/examples/flask/lib/charms/loki_k8s/v0/loki_push_api.py +++ b/examples/flask/lib/charms/loki_k8s/v0/loki_push_api.py @@ -16,9 +16,10 @@ applications such as pebble, or charmed operators of workloads such as grafana-agent or promtail, that can communicate with loki directly. -- `LogProxyConsumer`: This object can be used by any Charmed Operator which needs to -send telemetry, such as logs, to Loki through a Log Proxy by implementing the consumer side of the -`loki_push_api` relation interface. +- `LogProxyConsumer`: DEPRECATED. +This object can be used by any Charmed Operator which needs to send telemetry, such as logs, to +Loki through a Log Proxy by implementing the consumer side of the `loki_push_api` relation +interface. Filtering logs in Loki is largely performed on the basis of labels. In the Juju ecosystem, Juju topology labels are used to uniquely identify the workload which generates telemetry like logs. @@ -38,13 +39,14 @@ - `charm`: A reference to the parent (Loki) charm. - `relation_name`: The name of the relation that the charm uses to interact - with its clients, which implement `LokiPushApiConsumer` or `LogProxyConsumer`. + with its clients, which implement `LokiPushApiConsumer` or `LogProxyConsumer` + (note that LogProxyConsumer is deprecated). If provided, this relation name must match a provided relation in metadata.yaml with the `loki_push_api` interface. The default relation name is "logging" for `LokiPushApiConsumer` and "log-proxy" for - `LogProxyConsumer`. + `LogProxyConsumer` (note that LogProxyConsumer is deprecated). For example, a provider's `metadata.yaml` file may look as follows: @@ -219,6 +221,9 @@ def __init__(self, *args): ## LogProxyConsumer Library Usage +> Note: This object is deprecated. Consider migrating to LogForwarder (see v1/loki_push_api) with +> the release of Juju 3.6 LTS. + Let's say that we have a workload charm that produces logs, and we need to send those logs to a workload implementing the `loki_push_api` interface, such as `Loki` or `Grafana Agent`. @@ -480,7 +485,7 @@ def _alert_rules_error(self, event): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 29 +LIBPATCH = 30 PYDEPS = ["cosl"] @@ -1539,7 +1544,8 @@ def __init__( the Loki API endpoint to push logs. It is intended for workloads that can speak loki_push_api (https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki), such as grafana-agent. - (If you only need to forward a few workload log files, then use LogProxyConsumer.) + (If you need to forward workload stdout logs, then use v1/loki_push_api.LogForwarder; if + you need to forward log files, then use LogProxyConsumer.) `LokiPushApiConsumer` can be instantiated as follows: @@ -1728,6 +1734,9 @@ class LogProxyEvents(ObjectEvents): class LogProxyConsumer(ConsumerBase): """LogProxyConsumer class. + > Note: This object is deprecated. Consider migrating to v1/loki_push_api.LogForwarder with the + > release of Juju 3.6 LTS. + The `LogProxyConsumer` object provides a method for attaching `promtail` to a workload in order to generate structured logging data from applications which traditionally log to syslog or do not have native Loki integration. diff --git a/examples/flask/lib/charms/loki_k8s/v1/loki_push_api.py b/examples/flask/lib/charms/loki_k8s/v1/loki_push_api.py index 7f8372c..d75cb7e 100644 --- a/examples/flask/lib/charms/loki_k8s/v1/loki_push_api.py +++ b/examples/flask/lib/charms/loki_k8s/v1/loki_push_api.py @@ -480,6 +480,25 @@ def _alert_rules_error(self, event): Units of consumer charm send their alert rules over app relation data using the `alert_rules` key. + +## Charm logging +The `charms.loki_k8s.v0.charm_logging` library can be used in conjunction with this one to configure python's +logging module to forward all logs to Loki via the loki-push-api interface. + +```python +from lib.charms.loki_k8s.v0.charm_logging import log_charm +from lib.charms.loki_k8s.v1.loki_push_api import charm_logging_config, LokiPushApiConsumer + +@log_charm(logging_endpoint="my_endpoints", server_cert="cert_path") +class MyCharm(...): + _cert_path = "/path/to/cert/on/charm/container.crt" + def __init__(self, ...): + self.logging = LokiPushApiConsumer(...) + self.my_endpoints, self.cert_path = charm_logging_config( + self.logging, self._cert_path) +``` + +Do this, and all charm logs will be forwarded to Loki as soon as a relation is formed. """ import json @@ -527,7 +546,7 @@ def _alert_rules_error(self, event): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 12 +LIBPATCH = 13 PYDEPS = ["cosl"] @@ -577,7 +596,11 @@ def _alert_rules_error(self, event): GRPC_LISTEN_PORT_START = 9095 # odd start port -class RelationNotFoundError(ValueError): +class LokiPushApiError(Exception): + """Base class for errors raised by this module.""" + + +class RelationNotFoundError(LokiPushApiError): """Raised if there is no relation with the given name.""" def __init__(self, relation_name: str): @@ -587,7 +610,7 @@ def __init__(self, relation_name: str): super().__init__(self.message) -class RelationInterfaceMismatchError(Exception): +class RelationInterfaceMismatchError(LokiPushApiError): """Raised if the relation with the given name has a different interface.""" def __init__( @@ -607,7 +630,7 @@ def __init__( super().__init__(self.message) -class RelationRoleMismatchError(Exception): +class RelationRoleMismatchError(LokiPushApiError): """Raised if the relation with the given name has a different direction.""" def __init__( @@ -2555,7 +2578,7 @@ def _on_pebble_ready(self, event: PebbleReadyEvent): self._update_endpoints(event.workload, loki_endpoints) - def _update_logging(self, _): + def _update_logging(self, event: RelationEvent): """Update the log forwarding to match the active Loki endpoints.""" if not (loki_endpoints := self._retrieve_endpoints_from_relation()): logger.warning("No Loki endpoints available") @@ -2566,6 +2589,8 @@ def _update_logging(self, _): self._update_endpoints(container, loki_endpoints) # else: `_update_endpoints` will be called on pebble-ready anyway. + self._handle_alert_rules(event.relation) + def _retrieve_endpoints_from_relation(self) -> dict: loki_endpoints = {} @@ -2750,3 +2775,49 @@ def _exec(self, cmd) -> str: result = subprocess.run(cmd, check=True, stdout=subprocess.PIPE) output = result.stdout.decode("utf-8").strip() return output + + +def charm_logging_config( + endpoint_requirer: LokiPushApiConsumer, cert_path: Optional[Union[Path, str]] +) -> Tuple[Optional[List[str]], Optional[str]]: + """Utility function to determine the charm_logging config you will likely want. + + If no endpoint is provided: + disable charm logging. + If https endpoint is provided but cert_path is not found on disk: + disable charm logging. + If https endpoint is provided and cert_path is None: + ERROR + Else: + proceed with charm logging (with or without tls, as appropriate) + + Args: + endpoint_requirer: an instance of LokiPushApiConsumer. + cert_path: a path where a cert is stored. + + Returns: + A tuple with (optionally) the values of the endpoints and the certificate path. + + Raises: + LokiPushApiError: if some endpoint are http and others https. + """ + endpoints = [ep["url"] for ep in endpoint_requirer.loki_endpoints] + if not endpoints: + return None, None + + https = tuple(endpoint.startswith("https://") for endpoint in endpoints) + + if all(https): # all endpoints are https + if cert_path is None: + raise LokiPushApiError("Cannot send logs to https endpoints without a certificate.") + if not Path(cert_path).exists(): + # if endpoints is https BUT we don't have a server_cert yet: + # disable charm logging until we do to prevent tls errors + return None, None + return endpoints, str(cert_path) + + if all(not x for x in https): # all endpoints are http + return endpoints, None + + # if there's a disagreement, that's very weird: + raise LokiPushApiError("Some endpoints are http, some others are https. That's not good.") diff --git a/examples/flask/lib/charms/prometheus_k8s/v0/prometheus_scrape.py b/examples/flask/lib/charms/prometheus_k8s/v0/prometheus_scrape.py index e3d35c6..ca554fb 100644 --- a/examples/flask/lib/charms/prometheus_k8s/v0/prometheus_scrape.py +++ b/examples/flask/lib/charms/prometheus_k8s/v0/prometheus_scrape.py @@ -362,7 +362,7 @@ def _on_scrape_targets_changed(self, event): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 47 +LIBPATCH = 48 PYDEPS = ["cosl"] @@ -2364,12 +2364,9 @@ def _get_tool_path(self) -> Optional[Path]: arch = "amd64" if arch == "x86_64" else arch res = "cos-tool-{}".format(arch) try: - path = Path(res).resolve() - path.chmod(0o777) + path = Path(res).resolve(strict=True) return path - except NotImplementedError: - logger.debug("System lacks support for chmod") - except FileNotFoundError: + except (FileNotFoundError, OSError): logger.debug('Could not locate cos-tool at: "{}"'.format(res)) return None diff --git a/examples/flask/lib/charms/redis_k8s/v0/redis.py b/examples/flask/lib/charms/redis_k8s/v0/redis.py index a731507..e28b14c 100644 --- a/examples/flask/lib/charms/redis_k8s/v0/redis.py +++ b/examples/flask/lib/charms/redis_k8s/v0/redis.py @@ -39,7 +39,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version. -LIBPATCH = 6 +LIBPATCH = 7 logger = logging.getLogger(__name__) @@ -78,6 +78,18 @@ def _on_relation_broken(self, event): # Trigger an event that our charm can react to. self.charm.on.redis_relation_updated.emit() + @property + def app_data(self) -> Optional[Dict[str, str]]: + """Retrieve the app data. + + Returns: + Dict: dict containing the app data. + """ + relation = self.model.get_relation(self.relation_name) + if not relation: + return None + return relation.data[relation.app] + @property def relation_data(self) -> Optional[Dict[str, str]]: """Retrieve the relation data. @@ -98,10 +110,16 @@ def url(self) -> Optional[str]: Returns: str: the Redis URL. """ - relation_data = self.relation_data - if not relation_data: + if not (relation_data := self.relation_data): return None + redis_host = relation_data.get("hostname") + + if app_data := self.app_data: + try: + redis_host = self.app_data.get("leader-host", redis_host) + except KeyError: + pass redis_port = relation_data.get("port") return f"redis://{redis_host}:{redis_port}" diff --git a/examples/flask/lib/charms/saml_integrator/v0/saml.py b/examples/flask/lib/charms/saml_integrator/v0/saml.py index 722b1c6..8fd1610 100644 --- a/examples/flask/lib/charms/saml_integrator/v0/saml.py +++ b/examples/flask/lib/charms/saml_integrator/v0/saml.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2025 Canonical Ltd. +# Copyright 2024 Canonical Ltd. # Licensed under the Apache2.0. See LICENSE file in charm source for details. """Library to manage the relation data for the SAML Integrator charm. @@ -68,7 +68,7 @@ class method `from_relation_data`. # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 9 +LIBPATCH = 10 # pylint: disable=wrong-import-position import re @@ -92,7 +92,7 @@ class SamlEndpoint(BaseModel): """ name: str = Field(..., min_length=1) - url: AnyHttpUrl + url: typing.Optional[AnyHttpUrl] binding: str = Field(..., min_length=1) response_url: typing.Optional[AnyHttpUrl] @@ -108,7 +108,8 @@ def to_relation_data(self) -> typing.Dict[str, str]: # Transform name into snakecase lowercase_name = re.sub(r"(? "SamlEndpoi prefix = f"{lowercase_name}_{http_method}_" return cls( name=name, - url=parse_obj_as(AnyHttpUrl, relation_data[f"{prefix}url"]), + url=( + parse_obj_as(AnyHttpUrl, relation_data[f"{prefix}url"]) + if relation_data[f"{prefix}url"] + else None + ), binding=relation_data[f"{prefix}binding"], response_url=( parse_obj_as(AnyHttpUrl, relation_data[f"{prefix}response_url"]) @@ -158,7 +163,7 @@ class SamlRelationData(BaseModel): """ entity_id: str = Field(..., min_length=1) - metadata_url: AnyHttpUrl + metadata_url: typing.Optional[AnyHttpUrl] certificates: typing.Tuple[str, ...] endpoints: typing.Tuple[SamlEndpoint, ...] @@ -170,9 +175,10 @@ def to_relation_data(self) -> typing.Dict[str, str]: """ result = { "entity_id": self.entity_id, - "metadata_url": str(self.metadata_url), "x509certs": ",".join(self.certificates), } + if self.metadata_url: + result["metadata_url"] = str(self.metadata_url) for endpoint in self.endpoints: result.update(endpoint.to_relation_data()) return result @@ -201,8 +207,10 @@ def from_relation_data(cls, relation_data: ops.RelationDataContent) -> "SamlRela endpoints.sort(key=lambda ep: ep.name) return cls( entity_id=relation_data.get("entity_id"), # type: ignore - metadata_url=parse_obj_as( - AnyHttpUrl, relation_data.get("metadata_url") + metadata_url=( + parse_obj_as(AnyHttpUrl, relation_data.get("metadata_url")) + if relation_data.get("metadata_url") + else None ), # type: ignore certificates=tuple(relation_data.get("x509certs").split(",")), # type: ignore endpoints=tuple(endpoints), @@ -232,7 +240,7 @@ def entity_id(self) -> str: return self.saml_relation_data.entity_id @property - def metadata_url(self) -> str: + def metadata_url(self) -> typing.Optional[str]: """Fetch the SAML metadata URL from the relation.""" return str(self.saml_relation_data.metadata_url) diff --git a/examples/flask/lib/charms/tempo_coordinator_k8s/v0/tracing.py b/examples/flask/lib/charms/tempo_coordinator_k8s/v0/tracing.py new file mode 100644 index 0000000..27144fa --- /dev/null +++ b/examples/flask/lib/charms/tempo_coordinator_k8s/v0/tracing.py @@ -0,0 +1,987 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. +"""## Overview. + +This document explains how to integrate with the Tempo charm for the purpose of pushing traces to a +tracing endpoint provided by Tempo. It also explains how alternative implementations of the Tempo charm +may maintain the same interface and be backward compatible with all currently integrated charms. + +## Requirer Library Usage + +Charms seeking to push traces to Tempo, must do so using the `TracingEndpointRequirer` +object from this charm library. For the simplest use cases, using the `TracingEndpointRequirer` +object only requires instantiating it, typically in the constructor of your charm. The +`TracingEndpointRequirer` constructor requires the name of the relation over which a tracing endpoint + is exposed by the Tempo charm, and a list of protocols it intends to send traces with. + This relation must use the `tracing` interface. + The `TracingEndpointRequirer` object may be instantiated as follows + + from charms.tempo_coordinator_k8s.v0.tracing import TracingEndpointRequirer + + def __init__(self, *args): + super().__init__(*args) + # ... + self.tracing = TracingEndpointRequirer(self, + protocols=['otlp_grpc', 'otlp_http', 'jaeger_http_thrift'] + ) + # ... + +Note that the first argument (`self`) to `TracingEndpointRequirer` is always a reference to the +parent charm. + +Alternatively to providing the list of requested protocols at init time, the charm can do it at +any point in time by calling the +`TracingEndpointRequirer.request_protocols(*protocol:str, relation:Optional[Relation])` method. +Using this method also allows you to use per-relation protocols. + +Units of requirer charms obtain the tempo endpoint to which they will push their traces by calling +`TracingEndpointRequirer.get_endpoint(protocol: str)`, where `protocol` is, for example: +- `otlp_grpc` +- `otlp_http` +- `zipkin` +- `tempo` + +If the `protocol` is not in the list of protocols that the charm requested at endpoint set-up time, +the library will raise an error. + +We recommend that you scale up your tracing provider and relate it to an ingress so that your tracing requests +go through the ingress and get load balanced across all units. Otherwise, if the provider's leader goes down, your tracing goes down. + +## Provider Library Usage + +The `TracingEndpointProvider` object may be used by charms to manage relations with their +trace sources. For this purposes a Tempo-like charm needs to do two things + +1. Instantiate the `TracingEndpointProvider` object by providing it a +reference to the parent (Tempo) charm and optionally the name of the relation that the Tempo charm +uses to interact with its trace sources. This relation must conform to the `tracing` interface +and it is strongly recommended that this relation be named `tracing` which is its +default value. + +For example a Tempo charm may instantiate the `TracingEndpointProvider` in its constructor as +follows + + from charms.tempo_coordinator_k8s.v0.tracing import TracingEndpointProvider + + def __init__(self, *args): + super().__init__(*args) + # ... + self.tracing = TracingEndpointProvider(self) + # ... + + + +""" # noqa: W505 +import enum +import json +import logging +from pathlib import Path +from typing import ( + TYPE_CHECKING, + Any, + Dict, + List, + Literal, + MutableMapping, + Optional, + Sequence, + Tuple, + Union, + cast, +) + +import pydantic +from ops.charm import ( + CharmBase, + CharmEvents, + RelationBrokenEvent, + RelationEvent, + RelationRole, +) +from ops.framework import EventSource, Object +from ops.model import ModelError, Relation +from pydantic import BaseModel, Field + +# The unique Charmhub library identifier, never change it +LIBID = "d2f02b1f8d1244b5989fd55bc3a28943" + +# Increment this major API version when introducing breaking changes +LIBAPI = 0 + +# Increment this PATCH version before using `charmcraft publish-lib` or reset +# to 0 if you are raising the major API version +LIBPATCH = 5 + +PYDEPS = ["pydantic"] + +logger = logging.getLogger(__name__) + +DEFAULT_RELATION_NAME = "tracing" +RELATION_INTERFACE_NAME = "tracing" + +# Supported list rationale https://github.com/canonical/tempo-coordinator-k8s-operator/issues/8 +ReceiverProtocol = Literal[ + "zipkin", + "otlp_grpc", + "otlp_http", + "jaeger_grpc", + "jaeger_thrift_http", +] + +RawReceiver = Tuple[ReceiverProtocol, str] +"""Helper type. A raw receiver is defined as a tuple consisting of the protocol name, and the (external, if available), +(secured, if available) resolvable server url. +""" + +BUILTIN_JUJU_KEYS = {"ingress-address", "private-address", "egress-subnets"} + + +class TransportProtocolType(str, enum.Enum): + """Receiver Type.""" + + http = "http" + grpc = "grpc" + + +receiver_protocol_to_transport_protocol: Dict[ReceiverProtocol, TransportProtocolType] = { + "zipkin": TransportProtocolType.http, + "otlp_grpc": TransportProtocolType.grpc, + "otlp_http": TransportProtocolType.http, + "jaeger_thrift_http": TransportProtocolType.http, + "jaeger_grpc": TransportProtocolType.grpc, +} +"""A mapping between telemetry protocols and their corresponding transport protocol. +""" + + +class TracingError(Exception): + """Base class for custom errors raised by this library.""" + + +class NotReadyError(TracingError): + """Raised by the provider wrapper if a requirer hasn't published the required data (yet).""" + + +class ProtocolNotRequestedError(TracingError): + """Raised if the user attempts to obtain an endpoint for a protocol it did not request.""" + + +class DataValidationError(TracingError): + """Raised when data validation fails on IPU relation data.""" + + +class AmbiguousRelationUsageError(TracingError): + """Raised when one wrongly assumes that there can only be one relation on an endpoint.""" + + +if int(pydantic.version.VERSION.split(".")[0]) < 2: + + class DatabagModel(BaseModel): # type: ignore + """Base databag model.""" + + class Config: + """Pydantic config.""" + + # ignore any extra fields in the databag + extra = "ignore" + """Ignore any extra fields in the databag.""" + allow_population_by_field_name = True + """Allow instantiating this class by field name (instead of forcing alias).""" + + _NEST_UNDER = None + + @classmethod + def load(cls, databag: MutableMapping): + """Load this model from a Juju databag.""" + if cls._NEST_UNDER: + return cls.parse_obj(json.loads(databag[cls._NEST_UNDER])) + + try: + data = { + k: json.loads(v) + for k, v in databag.items() + # Don't attempt to parse model-external values + if k in {f.alias for f in cls.__fields__.values()} + } + except json.JSONDecodeError as e: + msg = f"invalid databag contents: expecting json. {databag}" + logger.error(msg) + raise DataValidationError(msg) from e + + try: + return cls.parse_raw(json.dumps(data)) # type: ignore + except pydantic.ValidationError as e: + msg = f"failed to validate databag: {databag}" + logger.debug(msg, exc_info=True) + raise DataValidationError(msg) from e + + def dump(self, databag: Optional[MutableMapping] = None, clear: bool = True): + """Write the contents of this model to Juju databag. + + :param databag: the databag to write the data to. + :param clear: ensure the databag is cleared before writing it. + """ + if clear and databag: + databag.clear() + + if databag is None: + databag = {} + + if self._NEST_UNDER: + databag[self._NEST_UNDER] = self.json(by_alias=True) + return databag + + dct = self.dict() + for key, field in self.__fields__.items(): # type: ignore + value = dct[key] + databag[field.alias or key] = json.dumps(value) + + return databag + +else: + from pydantic import ConfigDict + + class DatabagModel(BaseModel): + """Base databag model.""" + + model_config = ConfigDict( + # ignore any extra fields in the databag + extra="ignore", + # Allow instantiating this class by field name (instead of forcing alias). + populate_by_name=True, + # Custom config key: whether to nest the whole datastructure (as json) + # under a field or spread it out at the toplevel. + _NEST_UNDER=None, # type: ignore + ) + """Pydantic config.""" + + @classmethod + def load(cls, databag: MutableMapping): + """Load this model from a Juju databag.""" + nest_under = cls.model_config.get("_NEST_UNDER") # type: ignore + if nest_under: + return cls.model_validate(json.loads(databag[nest_under])) # type: ignore + + try: + data = { + k: json.loads(v) + for k, v in databag.items() + # Don't attempt to parse model-external values + if k in {(f.alias or n) for n, f in cls.__fields__.items()} + } + except json.JSONDecodeError as e: + msg = f"invalid databag contents: expecting json. {databag}" + logger.error(msg) + raise DataValidationError(msg) from e + + try: + return cls.model_validate_json(json.dumps(data)) # type: ignore + except pydantic.ValidationError as e: + msg = f"failed to validate databag: {databag}" + logger.debug(msg, exc_info=True) + raise DataValidationError(msg) from e + + def dump(self, databag: Optional[MutableMapping] = None, clear: bool = True): + """Write the contents of this model to Juju databag. + + :param databag: the databag to write the data to. + :param clear: ensure the databag is cleared before writing it. + """ + if clear and databag: + databag.clear() + + if databag is None: + databag = {} + nest_under = self.model_config.get("_NEST_UNDER") + if nest_under: + databag[nest_under] = self.model_dump_json( # type: ignore + by_alias=True, + # skip keys whose values are default + exclude_defaults=True, + ) + return databag + + dct = self.model_dump() # type: ignore + for key, field in self.model_fields.items(): # type: ignore + value = dct[key] + if value == field.default: + continue + databag[field.alias or key] = json.dumps(value) + + return databag + + +# todo use models from charm-relation-interfaces +if int(pydantic.version.VERSION.split(".")[0]) < 2: + + class ProtocolType(BaseModel): # type: ignore + """Protocol Type.""" + + class Config: + """Pydantic config.""" + + use_enum_values = True + """Allow serializing enum values.""" + + name: str = Field( + ..., + description="Receiver protocol name. What protocols are supported (and what they are called) " + "may differ per provider.", + examples=["otlp_grpc", "otlp_http", "tempo_http"], + ) + + type: TransportProtocolType = Field( + ..., + description="The transport protocol used by this receiver.", + examples=["http", "grpc"], + ) + +else: + + class ProtocolType(BaseModel): + """Protocol Type.""" + + model_config = ConfigDict( # type: ignore + # Allow serializing enum values. + use_enum_values=True + ) + """Pydantic config.""" + + name: str = Field( + ..., + description="Receiver protocol name. What protocols are supported (and what they are called) " + "may differ per provider.", + examples=["otlp_grpc", "otlp_http", "tempo_http"], + ) + + type: TransportProtocolType = Field( + ..., + description="The transport protocol used by this receiver.", + examples=["http", "grpc"], + ) + + +class Receiver(BaseModel): + """Specification of an active receiver.""" + + protocol: ProtocolType = Field(..., description="Receiver protocol name and type.") + url: str = Field( + ..., + description="""URL at which the receiver is reachable. If there's an ingress, it would be the external URL. + Otherwise, it would be the service's fqdn or internal IP. + If the protocol type is grpc, the url will not contain a scheme.""", + examples=[ + "http://traefik_address:2331", + "https://traefik_address:2331", + "http://tempo_public_ip:2331", + "https://tempo_public_ip:2331", + "tempo_public_ip:2331", + ], + ) + + +class TracingProviderAppData(DatabagModel): # noqa: D101 + """Application databag model for the tracing provider.""" + + receivers: List[Receiver] = Field( + ..., + description="List of all receivers enabled on the tracing provider.", + ) + + +class TracingRequirerAppData(DatabagModel): # noqa: D101 + """Application databag model for the tracing requirer.""" + + receivers: List[ReceiverProtocol] + """Requested receivers.""" + + +class _AutoSnapshotEvent(RelationEvent): + __args__: Tuple[str, ...] = () + __optional_kwargs__: Dict[str, Any] = {} + + @classmethod + def __attrs__(cls): + return cls.__args__ + tuple(cls.__optional_kwargs__.keys()) + + def __init__(self, handle, relation, *args, **kwargs): + super().__init__(handle, relation) + + if not len(self.__args__) == len(args): + raise TypeError("expected {} args, got {}".format(len(self.__args__), len(args))) + + for attr, obj in zip(self.__args__, args): + setattr(self, attr, obj) + for attr, default in self.__optional_kwargs__.items(): + obj = kwargs.get(attr, default) + setattr(self, attr, obj) + + def snapshot(self) -> dict: + dct = super().snapshot() + for attr in self.__attrs__(): + obj = getattr(self, attr) + try: + dct[attr] = obj + except ValueError as e: + raise ValueError( + "cannot automagically serialize {}: " + "override this method and do it " + "manually.".format(obj) + ) from e + + return dct + + def restore(self, snapshot: dict) -> None: + super().restore(snapshot) + for attr, obj in snapshot.items(): + setattr(self, attr, obj) + + +class RelationNotFoundError(Exception): + """Raised if no relation with the given name is found.""" + + def __init__(self, relation_name: str): + self.relation_name = relation_name + self.message = "No relation named '{}' found".format(relation_name) + super().__init__(self.message) + + +class RelationInterfaceMismatchError(Exception): + """Raised if the relation with the given name has an unexpected interface.""" + + def __init__( + self, + relation_name: str, + expected_relation_interface: str, + actual_relation_interface: str, + ): + self.relation_name = relation_name + self.expected_relation_interface = expected_relation_interface + self.actual_relation_interface = actual_relation_interface + self.message = ( + "The '{}' relation has '{}' as interface rather than the expected '{}'".format( + relation_name, actual_relation_interface, expected_relation_interface + ) + ) + + super().__init__(self.message) + + +class RelationRoleMismatchError(Exception): + """Raised if the relation with the given name has a different role than expected.""" + + def __init__( + self, + relation_name: str, + expected_relation_role: RelationRole, + actual_relation_role: RelationRole, + ): + self.relation_name = relation_name + self.expected_relation_interface = expected_relation_role + self.actual_relation_role = actual_relation_role + self.message = "The '{}' relation has role '{}' rather than the expected '{}'".format( + relation_name, repr(actual_relation_role), repr(expected_relation_role) + ) + + super().__init__(self.message) + + +def _validate_relation_by_interface_and_direction( + charm: CharmBase, + relation_name: str, + expected_relation_interface: str, + expected_relation_role: RelationRole, +): + """Validate a relation. + + Verifies that the `relation_name` provided: (1) exists in metadata.yaml, + (2) declares as interface the interface name passed as `relation_interface` + and (3) has the right "direction", i.e., it is a relation that `charm` + provides or requires. + + Args: + charm: a `CharmBase` object to scan for the matching relation. + relation_name: the name of the relation to be verified. + expected_relation_interface: the interface name to be matched by the + relation named `relation_name`. + expected_relation_role: whether the `relation_name` must be either + provided or required by `charm`. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the same relation interface + as specified via the `expected_relation_interface` argument. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the same role as specified + via the `expected_relation_role` argument. + """ + if relation_name not in charm.meta.relations: + raise RelationNotFoundError(relation_name) + + relation = charm.meta.relations[relation_name] + + # fixme: why do we need to cast here? + actual_relation_interface = cast(str, relation.interface_name) + + if actual_relation_interface != expected_relation_interface: + raise RelationInterfaceMismatchError( + relation_name, expected_relation_interface, actual_relation_interface + ) + + if expected_relation_role is RelationRole.provides: + if relation_name not in charm.meta.provides: + raise RelationRoleMismatchError( + relation_name, RelationRole.provides, RelationRole.requires + ) + elif expected_relation_role is RelationRole.requires: + if relation_name not in charm.meta.requires: + raise RelationRoleMismatchError( + relation_name, RelationRole.requires, RelationRole.provides + ) + else: + raise TypeError("Unexpected RelationDirection: {}".format(expected_relation_role)) + + +class RequestEvent(RelationEvent): + """Event emitted when a remote requests a tracing endpoint.""" + + @property + def requested_receivers(self) -> List[ReceiverProtocol]: + """List of receiver protocols that have been requested.""" + relation = self.relation + app = relation.app + if not app: + raise NotReadyError("relation.app is None") + + return TracingRequirerAppData.load(relation.data[app]).receivers + + +class BrokenEvent(RelationBrokenEvent): + """Event emitted when a relation on tracing is broken.""" + + +class TracingEndpointProviderEvents(CharmEvents): + """TracingEndpointProvider events.""" + + request = EventSource(RequestEvent) + broken = EventSource(BrokenEvent) + + +class TracingEndpointProvider(Object): + """Class representing a trace receiver service.""" + + on = TracingEndpointProviderEvents() # type: ignore + + def __init__( + self, + charm: CharmBase, + external_url: Optional[str] = None, + relation_name: str = DEFAULT_RELATION_NAME, + ): + """Initialize. + + Args: + charm: a `CharmBase` instance that manages this instance of the Tempo service. + external_url: external address of the node hosting the tempo server, + if an ingress is present. + relation_name: an optional string name of the relation between `charm` + and the Tempo charmed service. The default is "tracing". + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `tracing` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.requires` + role. + """ + _validate_relation_by_interface_and_direction( + charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.provides + ) + + super().__init__(charm, relation_name + "tracing-provider") + self._charm = charm + self._external_url = external_url + self._relation_name = relation_name + self.framework.observe( + self._charm.on[relation_name].relation_joined, self._on_relation_event + ) + self.framework.observe( + self._charm.on[relation_name].relation_created, self._on_relation_event + ) + self.framework.observe( + self._charm.on[relation_name].relation_changed, self._on_relation_event + ) + self.framework.observe( + self._charm.on[relation_name].relation_broken, self._on_relation_broken_event + ) + + def _on_relation_broken_event(self, e: RelationBrokenEvent): + """Handle relation broken events.""" + self.on.broken.emit(e.relation) + + def _on_relation_event(self, e: RelationEvent): + """Handle relation created/joined/changed events.""" + if self.is_requirer_ready(e.relation): + self.on.request.emit(e.relation) + + def is_requirer_ready(self, relation: Relation): + """Attempt to determine if requirer has already populated app data.""" + try: + self._get_requested_protocols(relation) + except NotReadyError: + return False + return True + + @staticmethod + def _get_requested_protocols(relation: Relation): + app = relation.app + if not app: + raise NotReadyError("relation.app is None") + + try: + databag = TracingRequirerAppData.load(relation.data[app]) + except (json.JSONDecodeError, pydantic.ValidationError, DataValidationError): + logger.info(f"relation {relation} is not ready to talk tracing") + raise NotReadyError() + return databag.receivers + + def requested_protocols(self): + """All receiver protocols that have been requested by our related apps.""" + requested_protocols = set() + for relation in self.relations: + try: + protocols = self._get_requested_protocols(relation) + except NotReadyError: + continue + requested_protocols.update(protocols) + return requested_protocols + + @property + def relations(self) -> List[Relation]: + """All relations active on this endpoint.""" + return self._charm.model.relations[self._relation_name] + + def publish_receivers(self, receivers: Sequence[RawReceiver]): + """Let all requirers know that these receivers are active and listening.""" + if not self._charm.unit.is_leader(): + raise RuntimeError("only leader can do this") + + for relation in self.relations: + try: + TracingProviderAppData( + receivers=[ + Receiver( + url=url, + protocol=ProtocolType( + name=protocol, + type=receiver_protocol_to_transport_protocol[protocol], + ), + ) + for protocol, url in receivers + ], + ).dump(relation.data[self._charm.app]) + + except ModelError as e: + # args are bytes + msg = e.args[0] + if isinstance(msg, bytes): + if msg.startswith( + b"ERROR cannot read relation application settings: permission denied" + ): + logger.error( + f"encountered error {e} while attempting to update_relation_data." + f"The relation must be gone." + ) + continue + raise + + +class EndpointRemovedEvent(RelationBrokenEvent): + """Event representing a change in one of the receiver endpoints.""" + + +class EndpointChangedEvent(_AutoSnapshotEvent): + """Event representing a change in one of the receiver endpoints.""" + + __args__ = ("_receivers",) + + if TYPE_CHECKING: + _receivers = [] # type: List[dict] + + @property + def receivers(self) -> List[Receiver]: + """Cast receivers back from dict.""" + return [Receiver(**i) for i in self._receivers] + + +class TracingEndpointRequirerEvents(CharmEvents): + """TracingEndpointRequirer events.""" + + endpoint_changed = EventSource(EndpointChangedEvent) + endpoint_removed = EventSource(EndpointRemovedEvent) + + +class TracingEndpointRequirer(Object): + """A tracing endpoint for Tempo.""" + + on = TracingEndpointRequirerEvents() # type: ignore + + def __init__( + self, + charm: CharmBase, + relation_name: str = DEFAULT_RELATION_NAME, + protocols: Optional[List[ReceiverProtocol]] = None, + ): + """Construct a tracing requirer for a Tempo charm. + + If your application supports pushing traces to a distributed tracing backend, the + `TracingEndpointRequirer` object enables your charm to easily access endpoint information + exchanged over a `tracing` relation interface. + + Args: + charm: a `CharmBase` object that manages this + `TracingEndpointRequirer` object. Typically, this is `self` in the instantiating + class. + relation_name: an optional string name of the relation between `charm` + and the Tempo charmed service. The default is "tracing". It is strongly + advised not to change the default, so that people deploying your charm will have a + consistent experience with all other charms that provide tracing endpoints. + protocols: optional list of protocols that the charm intends to send traces with. + The provider will enable receivers for these and only these protocols, + so be sure to enable all protocols the charm or its workload are going to need. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `tracing` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.provides` + role. + """ + _validate_relation_by_interface_and_direction( + charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.requires + ) + + super().__init__(charm, relation_name) + + self._is_single_endpoint = charm.meta.relations[relation_name].limit == 1 + + self._charm = charm + self._relation_name = relation_name + + events = self._charm.on[self._relation_name] + self.framework.observe(events.relation_changed, self._on_tracing_relation_changed) + self.framework.observe(events.relation_broken, self._on_tracing_relation_broken) + + if protocols: + self.request_protocols(protocols) + + def request_protocols( + self, protocols: Sequence[ReceiverProtocol], relation: Optional[Relation] = None + ): + """Publish the list of protocols which the provider should activate.""" + # todo: should we check if _is_single_endpoint and len(self.relations) > 1 and raise, here? + relations = [relation] if relation else self.relations + + if not protocols: + # empty sequence + raise ValueError( + "You need to pass a nonempty sequence of protocols to `request_protocols`." + ) + + try: + if self._charm.unit.is_leader(): + for relation in relations: + TracingRequirerAppData( + receivers=list(protocols), + ).dump(relation.data[self._charm.app]) + + except ModelError as e: + # args are bytes + msg = e.args[0] + if isinstance(msg, bytes): + if msg.startswith( + b"ERROR cannot read relation application settings: permission denied" + ): + logger.error( + f"encountered error {e} while attempting to request_protocols." + f"The relation must be gone." + ) + return + raise + + @property + def relations(self) -> List[Relation]: + """The tracing relations associated with this endpoint.""" + return self._charm.model.relations[self._relation_name] + + @property + def _relation(self) -> Optional[Relation]: + """If this wraps a single endpoint, the relation bound to it, if any.""" + if not self._is_single_endpoint: + objname = type(self).__name__ + raise AmbiguousRelationUsageError( + f"This {objname} wraps a {self._relation_name} endpoint that has " + "limit != 1. We can't determine what relation, of the possibly many, you are " + f"talking about. Please pass a relation instance while calling {objname}, " + "or set limit=1 in the charm metadata." + ) + relations = self.relations + return relations[0] if relations else None + + def is_ready(self, relation: Optional[Relation] = None): + """Is this endpoint ready?""" + relation = relation or self._relation + if not relation: + logger.debug(f"no relation on {self._relation_name !r}: tracing not ready") + return False + if relation.data is None: + logger.error(f"relation data is None for {relation}") + return False + if not relation.app: + logger.error(f"{relation} event received but there is no relation.app") + return False + try: + databag = dict(relation.data[relation.app]) + TracingProviderAppData.load(databag) + + except (json.JSONDecodeError, pydantic.ValidationError, DataValidationError): + logger.info(f"failed validating relation data for {relation}") + return False + return True + + def _on_tracing_relation_changed(self, event): + """Notify the providers that there is new endpoint information available.""" + relation = event.relation + if not self.is_ready(relation): + self.on.endpoint_removed.emit(relation) # type: ignore + return + + data = TracingProviderAppData.load(relation.data[relation.app]) + self.on.endpoint_changed.emit(relation, [i.dict() for i in data.receivers]) # type: ignore + + def _on_tracing_relation_broken(self, event: RelationBrokenEvent): + """Notify the providers that the endpoint is broken.""" + relation = event.relation + self.on.endpoint_removed.emit(relation) # type: ignore + + def get_all_endpoints( + self, relation: Optional[Relation] = None + ) -> Optional[TracingProviderAppData]: + """Unmarshalled relation data.""" + relation = relation or self._relation + if not self.is_ready(relation): + return + return TracingProviderAppData.load(relation.data[relation.app]) # type: ignore + + def _get_endpoint( + self, relation: Optional[Relation], protocol: ReceiverProtocol + ) -> Optional[str]: + app_data = self.get_all_endpoints(relation) + if not app_data: + return None + receivers: List[Receiver] = list( + filter(lambda i: i.protocol.name == protocol, app_data.receivers) + ) + if not receivers: + # it can happen if the charm requests tracing protocols, but the relay (such as grafana-agent) isn't yet + # connected to the tracing backend. In this case, it's not an error the charm author can do anything about + logger.warning(f"no receiver found with protocol={protocol!r}.") + return + if len(receivers) > 1: + # if we have more than 1 receiver that matches, it shouldn't matter which receiver we'll be using. + logger.warning( + f"too many receivers with protocol={protocol!r}; using first one. Found: {receivers}" + ) + + receiver = receivers[0] + return receiver.url + + def get_endpoint( + self, protocol: ReceiverProtocol, relation: Optional[Relation] = None + ) -> Optional[str]: + """Receiver endpoint for the given protocol. + + It could happen that this function gets called before the provider publishes the endpoints. + In such a scenario, if a non-leader unit calls this function, a permission denied exception will be raised due to + restricted access. To prevent this, this function needs to be guarded by the `is_ready` check. + + Raises: + ProtocolNotRequestedError: + If the charm unit is the leader unit and attempts to obtain an endpoint for a protocol it did not request. + """ + endpoint = self._get_endpoint(relation or self._relation, protocol=protocol) + if not endpoint: + requested_protocols = set() + relations = [relation] if relation else self.relations + for relation in relations: + try: + databag = TracingRequirerAppData.load(relation.data[self._charm.app]) + except DataValidationError: + continue + + requested_protocols.update(databag.receivers) + + if protocol not in requested_protocols: + raise ProtocolNotRequestedError(protocol, relation) + + return None + return endpoint + + +def charm_tracing_config( + endpoint_requirer: TracingEndpointRequirer, cert_path: Optional[Union[Path, str]] +) -> Tuple[Optional[str], Optional[str]]: + """Return the charm_tracing config you likely want. + + If no endpoint is provided: + disable charm tracing. + If https endpoint is provided but cert_path is not found on disk: + disable charm tracing. + If https endpoint is provided and cert_path is None: + ERROR + Else: + proceed with charm tracing (with or without tls, as appropriate) + + Usage: + >>> from lib.charms.tempo_coordinator_k8s.v0.charm_tracing import trace_charm + >>> from lib.charms.tempo_coordinator_k8s.v0.tracing import charm_tracing_config + >>> @trace_charm(tracing_endpoint="my_endpoint", cert_path="cert_path") + >>> class MyCharm(...): + >>> _cert_path = "/path/to/cert/on/charm/container.crt" + >>> def __init__(self, ...): + >>> self.tracing = TracingEndpointRequirer(...) + >>> self.my_endpoint, self.cert_path = charm_tracing_config( + ... self.tracing, self._cert_path) + """ + if not endpoint_requirer.is_ready(): + return None, None + + endpoint = endpoint_requirer.get_endpoint("otlp_http") + if not endpoint: + return None, None + + is_https = endpoint.startswith("https://") + + if is_https: + if cert_path is None or not Path(cert_path).exists(): + # disable charm tracing until we obtain a cert to prevent tls errors + logger.error( + "Tracing endpoint is https, but no server_cert has been passed." + "Please point @trace_charm to a `server_cert` attr. " + "This might also mean that the tracing provider is related to a " + "certificates provider, but this application is not (yet). " + "In that case, you might just have to wait a bit for the certificates " + "integration to settle. " + ) + return None, None + return endpoint, str(cert_path) + else: + return endpoint, None diff --git a/examples/flask/lib/charms/traefik_k8s/v2/ingress.py b/examples/flask/lib/charms/traefik_k8s/v2/ingress.py index 582a31f..bb7ac5e 100644 --- a/examples/flask/lib/charms/traefik_k8s/v2/ingress.py +++ b/examples/flask/lib/charms/traefik_k8s/v2/ingress.py @@ -1,4 +1,4 @@ -# Copyright 2025 Canonical Ltd. +# Copyright 2024 Canonical Ltd. # See LICENSE file for licensing details. r"""# Interface Library for ingress. @@ -56,13 +56,14 @@ def _on_ingress_revoked(self, event: IngressPerAppRevokedEvent): import socket import typing from dataclasses import dataclass +from functools import partial from typing import Any, Callable, Dict, List, MutableMapping, Optional, Sequence, Tuple, Union import pydantic from ops.charm import CharmBase, RelationBrokenEvent, RelationEvent from ops.framework import EventSource, Object, ObjectEvents, StoredState from ops.model import ModelError, Relation, Unit -from pydantic import AnyHttpUrl, BaseModel, Field, validator +from pydantic import AnyHttpUrl, BaseModel, Field # The unique Charmhub library identifier, never change it LIBID = "e6de2a5cd5b34422a204668f3b8f90d2" @@ -72,7 +73,7 @@ def _on_ingress_revoked(self, event: IngressPerAppRevokedEvent): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 13 +LIBPATCH = 14 PYDEPS = ["pydantic"] @@ -84,6 +85,9 @@ def _on_ingress_revoked(self, event: IngressPerAppRevokedEvent): PYDANTIC_IS_V1 = int(pydantic.version.VERSION.split(".")[0]) < 2 if PYDANTIC_IS_V1: + from pydantic import validator + + input_validator = partial(validator, pre=True) class DatabagModel(BaseModel): # type: ignore """Base databag model.""" @@ -143,7 +147,9 @@ def dump(self, databag: Optional[MutableMapping] = None, clear: bool = True): return databag else: - from pydantic import ConfigDict + from pydantic import ConfigDict, field_validator + + input_validator = partial(field_validator, mode="before") class DatabagModel(BaseModel): """Base databag model.""" @@ -171,7 +177,7 @@ def load(cls, databag: MutableMapping): k: json.loads(v) for k, v in databag.items() # Don't attempt to parse model-external values - if k in {(f.alias or n) for n, f in cls.__fields__.items()} # type: ignore + if k in {(f.alias or n) for n, f in cls.model_fields.items()} # type: ignore } except json.JSONDecodeError as e: msg = f"invalid databag contents: expecting json. {databag}" @@ -252,14 +258,14 @@ class IngressRequirerAppData(DatabagModel): default="http", description="What scheme to use in the generated ingress url" ) - @validator("scheme", pre=True) + @input_validator("scheme") def validate_scheme(cls, scheme): # noqa: N805 # pydantic wants 'cls' as first arg """Validate scheme arg.""" if scheme not in {"http", "https", "h2c"}: raise ValueError("invalid scheme: should be one of `http|https|h2c`") return scheme - @validator("port", pre=True) + @input_validator("port") def validate_port(cls, port): # noqa: N805 # pydantic wants 'cls' as first arg """Validate port.""" assert isinstance(port, int), type(port) @@ -277,13 +283,13 @@ class IngressRequirerUnitData(DatabagModel): "IP can only be None if the IP information can't be retrieved from juju.", ) - @validator("host", pre=True) + @input_validator("host") def validate_host(cls, host): # noqa: N805 # pydantic wants 'cls' as first arg """Validate host.""" assert isinstance(host, str), type(host) return host - @validator("ip", pre=True) + @input_validator("ip") def validate_ip(cls, ip): # noqa: N805 # pydantic wants 'cls' as first arg """Validate ip.""" if ip is None: @@ -462,7 +468,10 @@ def _handle_relation(self, event): event.relation, data.app.name, data.app.model, - [unit.dict() for unit in data.units], + [ + unit.dict() if PYDANTIC_IS_V1 else unit.model_dump(mode="json") + for unit in data.units + ], data.app.strip_prefix or False, data.app.redirect_https or False, ) diff --git a/examples/go/charm/lib/charms/data_platform_libs/v0/data_interfaces.py b/examples/go/charm/lib/charms/data_platform_libs/v0/data_interfaces.py index b331bdc..3bc2dd8 100644 --- a/examples/go/charm/lib/charms/data_platform_libs/v0/data_interfaces.py +++ b/examples/go/charm/lib/charms/data_platform_libs/v0/data_interfaces.py @@ -331,10 +331,14 @@ def _on_topic_requested(self, event: TopicRequestedEvent): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 36 +LIBPATCH = 40 PYDEPS = ["ops>=2.0.0"] +# Starting from what LIBPATCH number to apply legacy solutions +# v0.17 was the last version without secrets +LEGACY_SUPPORT_FROM = 17 + logger = logging.getLogger(__name__) Diff = namedtuple("Diff", "added changed deleted") @@ -351,36 +355,16 @@ def _on_topic_requested(self, event: TopicRequestedEvent): GROUP_MAPPING_FIELD = "secret_group_mapping" GROUP_SEPARATOR = "@" +MODEL_ERRORS = { + "not_leader": "this unit is not the leader", + "no_label_and_uri": "ERROR either URI or label should be used for getting an owned secret but not both", + "owner_no_refresh": "ERROR secret owner cannot use --refresh", +} -class SecretGroup(str): - """Secret groups specific type.""" - - -class SecretGroupsAggregate(str): - """Secret groups with option to extend with additional constants.""" - - def __init__(self): - self.USER = SecretGroup("user") - self.TLS = SecretGroup("tls") - self.EXTRA = SecretGroup("extra") - - def __setattr__(self, name, value): - """Setting internal constants.""" - if name in self.__dict__: - raise RuntimeError("Can't set constant!") - else: - super().__setattr__(name, SecretGroup(value)) - - def groups(self) -> list: - """Return the list of stored SecretGroups.""" - return list(self.__dict__.values()) - - def get_group(self, group: str) -> Optional[SecretGroup]: - """If the input str translates to a group name, return that.""" - return SecretGroup(group) if group in self.groups() else None - -SECRET_GROUPS = SecretGroupsAggregate() +############################################################################## +# Exceptions +############################################################################## class DataInterfacesError(Exception): @@ -407,6 +391,19 @@ class IllegalOperationError(DataInterfacesError): """To be used when an operation is not allowed to be performed.""" +class PrematureDataAccessError(DataInterfacesError): + """To be raised when the Relation Data may be accessed (written) before protocol init complete.""" + + +############################################################################## +# Global helpers / utilities +############################################################################## + +############################################################################## +# Databag handling and comparison methods +############################################################################## + + def get_encoded_dict( relation: Relation, member: Union[Unit, Application], field: str ) -> Optional[Dict[str, str]]: @@ -482,6 +479,11 @@ def diff(event: RelationChangedEvent, bucket: Optional[Union[Unit, Application]] return Diff(added, changed, deleted) +############################################################################## +# Module decorators +############################################################################## + + def leader_only(f): """Decorator to ensure that only leader can perform given operation.""" @@ -536,6 +538,36 @@ def wrapper(self, *args, **kwargs): return wrapper +def legacy_apply_from_version(version: int) -> Callable: + """Decorator to decide whether to apply a legacy function or not. + + Based on LEGACY_SUPPORT_FROM module variable value, the importer charm may only want + to apply legacy solutions starting from a specific LIBPATCH. + + NOTE: All 'legacy' functions have to be defined and called in a way that they return `None`. + This results in cleaner and more secure execution flows in case the function may be disabled. + This requirement implicitly means that legacy functions change the internal state strictly, + don't return information. + """ + + def decorator(f: Callable[..., None]): + """Signature is ensuring None return value.""" + f.legacy_version = version + + def wrapper(self, *args, **kwargs) -> None: + if version >= LEGACY_SUPPORT_FROM: + return f(self, *args, **kwargs) + + return wrapper + + return decorator + + +############################################################################## +# Helper classes +############################################################################## + + class Scope(Enum): """Peer relations scope.""" @@ -543,9 +575,35 @@ class Scope(Enum): UNIT = "unit" -################################################################################ -# Secrets internal caching -################################################################################ +class SecretGroup(str): + """Secret groups specific type.""" + + +class SecretGroupsAggregate(str): + """Secret groups with option to extend with additional constants.""" + + def __init__(self): + self.USER = SecretGroup("user") + self.TLS = SecretGroup("tls") + self.EXTRA = SecretGroup("extra") + + def __setattr__(self, name, value): + """Setting internal constants.""" + if name in self.__dict__: + raise RuntimeError("Can't set constant!") + else: + super().__setattr__(name, SecretGroup(value)) + + def groups(self) -> list: + """Return the list of stored SecretGroups.""" + return list(self.__dict__.values()) + + def get_group(self, group: str) -> Optional[SecretGroup]: + """If the input str translates to a group name, return that.""" + return SecretGroup(group) if group in self.groups() else None + + +SECRET_GROUPS = SecretGroupsAggregate() class CachedSecret: @@ -554,6 +612,8 @@ class CachedSecret: The data structure is precisely re-using/simulating as in the actual Secret Storage """ + KNOWN_MODEL_ERRORS = [MODEL_ERRORS["no_label_and_uri"], MODEL_ERRORS["owner_no_refresh"]] + def __init__( self, model: Model, @@ -571,6 +631,95 @@ def __init__( self.legacy_labels = legacy_labels self.current_label = None + @property + def meta(self) -> Optional[Secret]: + """Getting cached secret meta-information.""" + if not self._secret_meta: + if not (self._secret_uri or self.label): + return + + try: + self._secret_meta = self._model.get_secret(label=self.label) + except SecretNotFoundError: + # Falling back to seeking for potential legacy labels + self._legacy_compat_find_secret_by_old_label() + + # If still not found, to be checked by URI, to be labelled with the proposed label + if not self._secret_meta and self._secret_uri: + self._secret_meta = self._model.get_secret(id=self._secret_uri, label=self.label) + return self._secret_meta + + ########################################################################## + # Backwards compatibility / Upgrades + ########################################################################## + # These functions are used to keep backwards compatibility on rolling upgrades + # Policy: + # All data is kept intact until the first write operation. (This allows a minimal + # grace period during which rollbacks are fully safe. For more info see the spec.) + # All data involves: + # - databag contents + # - secrets content + # - secret labels (!!!) + # Legacy functions must return None, and leave an equally consistent state whether + # they are executed or skipped (as a high enough versioned execution environment may + # not require so) + + # Compatibility + + @legacy_apply_from_version(34) + def _legacy_compat_find_secret_by_old_label(self) -> None: + """Compatibility function, allowing to find a secret by a legacy label. + + This functionality is typically needed when secret labels changed over an upgrade. + Until the first write operation, we need to maintain data as it was, including keeping + the old secret label. In order to keep track of the old label currently used to access + the secret, and additional 'current_label' field is being defined. + """ + for label in self.legacy_labels: + try: + self._secret_meta = self._model.get_secret(label=label) + except SecretNotFoundError: + pass + else: + if label != self.label: + self.current_label = label + return + + # Migrations + + @legacy_apply_from_version(34) + def _legacy_migration_to_new_label_if_needed(self) -> None: + """Helper function to re-create the secret with a different label. + + Juju does not provide a way to change secret labels. + Thus whenever moving from secrets version that involves secret label changes, + we "re-create" the existing secret, and attach the new label to the new + secret, to be used from then on. + + Note: we replace the old secret with a new one "in place", as we can't + easily switch the containing SecretCache structure to point to a new secret. + Instead we are changing the 'self' (CachedSecret) object to point to the + new instance. + """ + if not self.current_label or not (self.meta and self._secret_meta): + return + + # Create a new secret with the new label + content = self._secret_meta.get_content() + self._secret_uri = None + + # It will be nice to have the possibility to check if we are the owners of the secret... + try: + self._secret_meta = self.add_secret(content, label=self.label) + except ModelError as err: + if MODEL_ERRORS["not_leader"] not in str(err): + raise + self.current_label = None + + ########################################################################## + # Public functions + ########################################################################## + def add_secret( self, content: Dict[str, str], @@ -593,28 +742,6 @@ def add_secret( self._secret_meta = secret return self._secret_meta - @property - def meta(self) -> Optional[Secret]: - """Getting cached secret meta-information.""" - if not self._secret_meta: - if not (self._secret_uri or self.label): - return - - for label in [self.label] + self.legacy_labels: - try: - self._secret_meta = self._model.get_secret(label=label) - except SecretNotFoundError: - pass - else: - if label != self.label: - self.current_label = label - break - - # If still not found, to be checked by URI, to be labelled with the proposed label - if not self._secret_meta and self._secret_uri: - self._secret_meta = self._model.get_secret(id=self._secret_uri, label=self.label) - return self._secret_meta - def get_content(self) -> Dict[str, str]: """Getting cached secret content.""" if not self._secret_content: @@ -624,42 +751,25 @@ def get_content(self) -> Dict[str, str]: except (ValueError, ModelError) as err: # https://bugs.launchpad.net/juju/+bug/2042596 # Only triggered when 'refresh' is set - known_model_errors = [ - "ERROR either URI or label should be used for getting an owned secret but not both", - "ERROR secret owner cannot use --refresh", - ] if isinstance(err, ModelError) and not any( - msg in str(err) for msg in known_model_errors + msg in str(err) for msg in self.KNOWN_MODEL_ERRORS ): raise # Due to: ValueError: Secret owner cannot use refresh=True self._secret_content = self.meta.get_content() return self._secret_content - def _move_to_new_label_if_needed(self): - """Helper function to re-create the secret with a different label.""" - if not self.current_label or not (self.meta and self._secret_meta): - return - - # Create a new secret with the new label - content = self._secret_meta.get_content() - self._secret_uri = None - - # I wish we could just check if we are the owners of the secret... - try: - self._secret_meta = self.add_secret(content, label=self.label) - except ModelError as err: - if "this unit is not the leader" not in str(err): - raise - self.current_label = None - def set_content(self, content: Dict[str, str]) -> None: """Setting cached secret content.""" if not self.meta: return + # DPE-4182: do not create new revision if the content stay the same + if content == self.get_content(): + return + if content: - self._move_to_new_label_if_needed() + self._legacy_migration_to_new_label_if_needed() self.meta.set_content(content) self._secret_content = content else: @@ -922,6 +1032,23 @@ def _delete_relation_data(self, relation: Relation, fields: List[str]) -> None: """Delete data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" raise NotImplementedError + # Optional overrides + + def _legacy_apply_on_fetch(self) -> None: + """This function should provide a list of compatibility functions to be applied when fetching (legacy) data.""" + pass + + def _legacy_apply_on_update(self, fields: List[str]) -> None: + """This function should provide a list of compatibility functions to be applied when writing data. + + Since data may be at a legacy version, migration may be mandatory. + """ + pass + + def _legacy_apply_on_delete(self, fields: List[str]) -> None: + """This function should provide a list of compatibility functions to be applied when deleting (legacy) data.""" + pass + # Internal helper methods @staticmethod @@ -1174,6 +1301,16 @@ def get_relation(self, relation_name, relation_id) -> Relation: return relation + def get_secret_uri(self, relation: Relation, group: SecretGroup) -> Optional[str]: + """Get the secret URI for the corresponding group.""" + secret_field = self._generate_secret_field_name(group) + return relation.data[self.component].get(secret_field) + + def set_secret_uri(self, relation: Relation, group: SecretGroup, secret_uri: str) -> None: + """Set the secret URI for the corresponding group.""" + secret_field = self._generate_secret_field_name(group) + relation.data[self.component][secret_field] = secret_uri + def fetch_relation_data( self, relation_ids: Optional[List[int]] = None, @@ -1190,6 +1327,8 @@ def fetch_relation_data( a dict of the values stored in the relation data bag for all relation instances (indexed by the relation ID). """ + self._legacy_apply_on_fetch() + if not relation_name: relation_name = self.relation_name @@ -1228,6 +1367,8 @@ def fetch_my_relation_data( NOTE: Since only the leader can read the relation's 'this_app'-side Application databag, the functionality is limited to leaders """ + self._legacy_apply_on_fetch() + if not relation_name: relation_name = self.relation_name @@ -1259,6 +1400,8 @@ def fetch_my_relation_field( @leader_only def update_relation_data(self, relation_id: int, data: dict) -> None: """Update the data within the relation.""" + self._legacy_apply_on_update(list(data.keys())) + relation_name = self.relation_name relation = self.get_relation(relation_name, relation_id) return self._update_relation_data(relation, data) @@ -1266,6 +1409,8 @@ def update_relation_data(self, relation_id: int, data: dict) -> None: @leader_only def delete_relation_data(self, relation_id: int, fields: List[str]) -> None: """Remove field from the relation.""" + self._legacy_apply_on_delete(fields) + relation_name = self.relation_name relation = self.get_relation(relation_name, relation_id) return self._delete_relation_data(relation, fields) @@ -1312,6 +1457,8 @@ def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: class ProviderData(Data): """Base provides-side of the data products relation.""" + RESOURCE_FIELD = "database" + def __init__( self, model: Model, @@ -1332,8 +1479,7 @@ def _add_relation_secret( uri_to_databag=True, ) -> bool: """Add a new Juju Secret that will be registered in the relation databag.""" - secret_field = self._generate_secret_field_name(group_mapping) - if uri_to_databag and relation.data[self.component].get(secret_field): + if uri_to_databag and self.get_secret_uri(relation, group_mapping): logging.error("Secret for relation %s already exists, not adding again", relation.id) return False @@ -1344,7 +1490,7 @@ def _add_relation_secret( # According to lint we may not have a Secret ID if uri_to_databag and secret.meta and secret.meta.id: - relation.data[self.component][secret_field] = secret.meta.id + self.set_secret_uri(relation, group_mapping, secret.meta.id) # Return the content that was added return True @@ -1445,8 +1591,7 @@ def _get_relation_secret( if not relation: return - secret_field = self._generate_secret_field_name(group_mapping) - if secret_uri := relation.data[self.local_app].get(secret_field): + if secret_uri := self.get_secret_uri(relation, group_mapping): return self.secrets.get(label, secret_uri) def _fetch_specific_relation_data( @@ -1479,6 +1624,15 @@ def _fetch_my_specific_relation_data( def _update_relation_data(self, relation: Relation, data: Dict[str, str]) -> None: """Set values for fields not caring whether it's a secret or not.""" req_secret_fields = [] + + keys = set(data.keys()) + if self.fetch_relation_field(relation.id, self.RESOURCE_FIELD) is None and ( + keys - {"endpoints", "read-only-endpoints", "replset"} + ): + raise PrematureDataAccessError( + "Premature access to relation data, update is forbidden before the connection is initialized." + ) + if relation.app: req_secret_fields = get_encoded_list(relation, relation.app, REQ_SECRET_FIELDS) @@ -1599,11 +1753,10 @@ def _register_secrets_to_relation(self, relation: Relation, params_name_list: Li for group in SECRET_GROUPS.groups(): secret_field = self._generate_secret_field_name(group) - if secret_field in params_name_list: - if secret_uri := relation.data[relation.app].get(secret_field): - self._register_secret_to_relation( - relation.name, relation.id, secret_uri, group - ) + if secret_field in params_name_list and ( + secret_uri := self.get_secret_uri(relation, group) + ): + self._register_secret_to_relation(relation.name, relation.id, secret_uri, group) def _is_resource_created_for_relation(self, relation: Relation) -> bool: if not relation.app: @@ -1614,6 +1767,17 @@ def _is_resource_created_for_relation(self, relation: Relation) -> bool: ) return bool(data.get("username")) and bool(data.get("password")) + # Public functions + + def get_secret_uri(self, relation: Relation, group: SecretGroup) -> Optional[str]: + """Getting relation secret URI for the corresponding Secret Group.""" + secret_field = self._generate_secret_field_name(group) + return relation.data[relation.app].get(secret_field) + + def set_secret_uri(self, relation: Relation, group: SecretGroup, uri: str) -> None: + """Setting relation secret URI is not possible for a Requirer.""" + raise NotImplementedError("Requirer can not change the relation secret URI.") + def is_resource_created(self, relation_id: Optional[int] = None) -> bool: """Check if the resource has been created. @@ -1764,7 +1928,6 @@ def __init__( secret_field_name: Optional[str] = None, deleted_label: Optional[str] = None, ): - """Manager of base client relations.""" RequirerData.__init__( self, model, @@ -1775,6 +1938,11 @@ def __init__( self.secret_field_name = secret_field_name if secret_field_name else self.SECRET_FIELD_NAME self.deleted_label = deleted_label self._secret_label_map = {} + + # Legacy information holders + self._legacy_labels = [] + self._legacy_secret_uri = None + # Secrets that are being dynamically added within the scope of this event handler run self._new_secrets = [] self._additional_secret_group_mapping = additional_secret_group_mapping @@ -1849,10 +2017,12 @@ def set_secret( value: The string value of the secret group_mapping: The name of the "secret group", in case the field is to be added to an existing secret """ + self._legacy_apply_on_update([field]) + full_field = self._field_to_internal_name(field, group_mapping) if self.secrets_enabled and full_field not in self.current_secret_fields: self._new_secrets.append(full_field) - if self._no_group_with_databag(field, full_field): + if self.valid_field_pattern(field, full_field): self.update_relation_data(relation_id, {full_field: value}) # Unlike for set_secret(), there's no harm using this operation with static secrets @@ -1865,6 +2035,8 @@ def get_secret( group_mapping: Optional[SecretGroup] = None, ) -> Optional[str]: """Public interface method to fetch secrets only.""" + self._legacy_apply_on_fetch() + full_field = self._field_to_internal_name(field, group_mapping) if ( self.secrets_enabled @@ -1872,7 +2044,7 @@ def get_secret( and field not in self.current_secret_fields ): return - if self._no_group_with_databag(field, full_field): + if self.valid_field_pattern(field, full_field): return self.fetch_my_relation_field(relation_id, full_field) @dynamic_secrets_only @@ -1883,14 +2055,19 @@ def delete_secret( group_mapping: Optional[SecretGroup] = None, ) -> Optional[str]: """Public interface method to delete secrets only.""" + self._legacy_apply_on_delete([field]) + full_field = self._field_to_internal_name(field, group_mapping) if self.secrets_enabled and full_field not in self.current_secret_fields: logger.warning(f"Secret {field} from group {group_mapping} was not found") return - if self._no_group_with_databag(field, full_field): + + if self.valid_field_pattern(field, full_field): self.delete_relation_data(relation_id, [full_field]) + ########################################################################## # Helpers + ########################################################################## @staticmethod def _field_to_internal_name(field: str, group: Optional[SecretGroup]) -> str: @@ -1932,10 +2109,69 @@ def _content_for_secret_group( if k in self.secret_fields } - # Backwards compatibility + def valid_field_pattern(self, field: str, full_field: str) -> bool: + """Check that no secret group is attempted to be used together without secrets being enabled. + + Secrets groups are impossible to use with versions that are not yet supporting secrets. + """ + if not self.secrets_enabled and full_field != field: + logger.error( + f"Can't access {full_field}: no secrets available (i.e. no secret groups either)." + ) + return False + return True + + ########################################################################## + # Backwards compatibility / Upgrades + ########################################################################## + # These functions are used to keep backwards compatibility on upgrades + # Policy: + # All data is kept intact until the first write operation. (This allows a minimal + # grace period during which rollbacks are fully safe. For more info see spec.) + # All data involves: + # - databag + # - secrets content + # - secret labels (!!!) + # Legacy functions must return None, and leave an equally consistent state whether + # they are executed or skipped (as a high enough versioned execution environment may + # not require so) + + # Full legacy stack for each operation + + def _legacy_apply_on_fetch(self) -> None: + """All legacy functions to be applied on fetch.""" + relation = self._model.relations[self.relation_name][0] + self._legacy_compat_generate_prev_labels() + self._legacy_compat_secret_uri_from_databag(relation) + + def _legacy_apply_on_update(self, fields) -> None: + """All legacy functions to be applied on update.""" + relation = self._model.relations[self.relation_name][0] + self._legacy_compat_generate_prev_labels() + self._legacy_compat_secret_uri_from_databag(relation) + self._legacy_migration_remove_secret_from_databag(relation, fields) + self._legacy_migration_remove_secret_field_name_from_databag(relation) + + def _legacy_apply_on_delete(self, fields) -> None: + """All legacy functions to be applied on delete.""" + relation = self._model.relations[self.relation_name][0] + self._legacy_compat_generate_prev_labels() + self._legacy_compat_secret_uri_from_databag(relation) + self._legacy_compat_check_deleted_label(relation, fields) + + # Compatibility + + @legacy_apply_from_version(18) + def _legacy_compat_check_deleted_label(self, relation, fields) -> None: + """Helper function for legacy behavior. + + As long as https://bugs.launchpad.net/juju/+bug/2028094 wasn't fixed, + we did not delete fields but rather kept them in the secret with a string value + expressing invalidity. This function is maintainnig that behavior when needed. + """ + if not self.deleted_label: + return - def _check_deleted_label(self, relation, fields) -> None: - """Helper function for legacy behavior.""" current_data = self.fetch_my_relation_data([relation.id], fields) if current_data is not None: # Check if the secret we wanna delete actually exists @@ -1948,7 +2184,43 @@ def _check_deleted_label(self, relation, fields) -> None: ", ".join(non_existent), ) - def _remove_secret_from_databag(self, relation, fields: List[str]) -> None: + @legacy_apply_from_version(18) + def _legacy_compat_secret_uri_from_databag(self, relation) -> None: + """Fetching the secret URI from the databag, in case stored there.""" + self._legacy_secret_uri = relation.data[self.component].get( + self._generate_secret_field_name(), None + ) + + @legacy_apply_from_version(34) + def _legacy_compat_generate_prev_labels(self) -> None: + """Generator for legacy secret label names, for backwards compatibility. + + Secret label is part of the data that MUST be maintained across rolling upgrades. + In case there may be a change on a secret label, the old label must be recognized + after upgrades, and left intact until the first write operation -- when we roll over + to the new label. + + This function keeps "memory" of previously used secret labels. + NOTE: Return value takes decorator into account -- all 'legacy' functions may return `None` + + v0.34 (rev69): Fixing issue https://github.com/canonical/data-platform-libs/issues/155 + meant moving from '.' (i.e. 'mysql.app', 'mysql.unit') + to labels '..' (like 'peer.mysql.app') + """ + if self._legacy_labels: + return + + result = [] + members = [self._model.app.name] + if self.scope: + members.append(self.scope.value) + result.append(f"{'.'.join(members)}") + self._legacy_labels = result + + # Migration + + @legacy_apply_from_version(18) + def _legacy_migration_remove_secret_from_databag(self, relation, fields: List[str]) -> None: """For Rolling Upgrades -- when moving from databag to secrets usage. Practically what happens here is to remove stuff from the databag that is @@ -1962,10 +2234,16 @@ def _remove_secret_from_databag(self, relation, fields: List[str]) -> None: if self._fetch_relation_data_without_secrets(self.component, relation, [field]): self._delete_relation_data_without_secrets(self.component, relation, [field]) - def _remove_secret_field_name_from_databag(self, relation) -> None: + @legacy_apply_from_version(18) + def _legacy_migration_remove_secret_field_name_from_databag(self, relation) -> None: """Making sure that the old databag URI is gone. This action should not be executed more than once. + + There was a phase (before moving secrets usage to libs) when charms saved the peer + secret URI to the databag, and used this URI from then on to retrieve their secret. + When upgrading to charm versions using this library, we need to add a label to the + secret and access it via label from than on, and remove the old traces from the databag. """ # Nothing to do if 'internal-secret' is not in the databag if not (relation.data[self.component].get(self._generate_secret_field_name())): @@ -1981,25 +2259,9 @@ def _remove_secret_field_name_from_databag(self, relation) -> None: # Databag reference to the secret URI can be removed, now that it's labelled relation.data[self.component].pop(self._generate_secret_field_name(), None) - def _previous_labels(self) -> List[str]: - """Generator for legacy secret label names, for backwards compatibility.""" - result = [] - members = [self._model.app.name] - if self.scope: - members.append(self.scope.value) - result.append(f"{'.'.join(members)}") - return result - - def _no_group_with_databag(self, field: str, full_field: str) -> bool: - """Check that no secret group is attempted to be used together with databag.""" - if not self.secrets_enabled and full_field != field: - logger.error( - f"Can't access {full_field}: no secrets available (i.e. no secret groups either)." - ) - return False - return True - + ########################################################################## # Event handlers + ########################################################################## def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: """Event emitted when the relation has changed.""" @@ -2009,7 +2271,9 @@ def _on_secret_changed_event(self, event: SecretChangedEvent) -> None: """Event emitted when the secret has changed.""" pass + ########################################################################## # Overrides of Relation Data handling functions + ########################################################################## def _generate_secret_label( self, relation_name: str, relation_id: int, group_mapping: SecretGroup @@ -2046,13 +2310,14 @@ def _get_relation_secret( return label = self._generate_secret_label(relation_name, relation_id, group_mapping) - secret_uri = relation.data[self.component].get(self._generate_secret_field_name(), None) # URI or legacy label is only to applied when moving single legacy secret to a (new) label if group_mapping == SECRET_GROUPS.EXTRA: # Fetching the secret with fallback to URI (in case label is not yet known) # Label would we "stuck" on the secret in case it is found - return self.secrets.get(label, secret_uri, legacy_labels=self._previous_labels()) + return self.secrets.get( + label, self._legacy_secret_uri, legacy_labels=self._legacy_labels + ) return self.secrets.get(label) def _get_group_secret_contents( @@ -2082,7 +2347,6 @@ def _fetch_my_specific_relation_data( @either_static_or_dynamic_secrets def _update_relation_data(self, relation: Relation, data: Dict[str, str]) -> None: """Update data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" - self._remove_secret_from_databag(relation, list(data.keys())) _, normal_fields = self._process_secret_fields( relation, self.secret_fields, @@ -2091,7 +2355,6 @@ def _update_relation_data(self, relation: Relation, data: Dict[str, str]) -> Non data=data, uri_to_databag=False, ) - self._remove_secret_field_name_from_databag(relation) normal_content = {k: v for k, v in data.items() if k in normal_fields} self._update_relation_data_without_secrets(self.component, relation, normal_content) @@ -2100,8 +2363,6 @@ def _update_relation_data(self, relation: Relation, data: Dict[str, str]) -> Non def _delete_relation_data(self, relation: Relation, fields: List[str]) -> None: """Delete data available (directily or indirectly -- i.e. secrets) from the relation for owner/this_app.""" if self.secret_fields and self.deleted_label: - # Legacy, backwards compatibility - self._check_deleted_label(relation, fields) _, normal_fields = self._process_secret_fields( relation, @@ -2137,7 +2398,9 @@ def fetch_relation_field( "fetch_my_relation_data() and fetch_my_relation_field()" ) + ########################################################################## # Public functions -- inherited + ########################################################################## fetch_my_relation_data = Data.fetch_my_relation_data fetch_my_relation_field = Data.fetch_my_relation_field @@ -2602,6 +2865,14 @@ def set_version(self, relation_id: int, version: str) -> None: """ self.update_relation_data(relation_id, {"version": version}) + def set_subordinated(self, relation_id: int) -> None: + """Raises the subordinated flag in the application relation databag. + + Args: + relation_id: the identifier for a particular relation. + """ + self.update_relation_data(relation_id, {"subordinated": "true"}) + class DatabaseProviderEventHandlers(EventHandlers): """Provider-side of the database relation handlers.""" @@ -2838,6 +3109,21 @@ def _on_relation_created_event(self, event: RelationCreatedEvent) -> None: def _on_relation_changed_event(self, event: RelationChangedEvent) -> None: """Event emitted when the database relation has changed.""" + is_subordinate = False + remote_unit_data = None + for key in event.relation.data.keys(): + if isinstance(key, Unit) and not key.name.startswith(self.charm.app.name): + remote_unit_data = event.relation.data[key] + elif isinstance(key, Application) and key.name != self.charm.app.name: + is_subordinate = event.relation.data[key].get("subordinated") == "true" + + if is_subordinate: + if not remote_unit_data: + return + + if remote_unit_data.get("state") != "ready": + return + # Check which data has changed to emit customs events. diff = self._diff(event) @@ -3019,6 +3305,8 @@ class KafkaRequiresEvents(CharmEvents): class KafkaProviderData(ProviderData): """Provider-side of the Kafka relation.""" + RESOURCE_FIELD = "topic" + def __init__(self, model: Model, relation_name: str) -> None: super().__init__(model, relation_name) @@ -3268,6 +3556,8 @@ class OpenSearchRequiresEvents(CharmEvents): class OpenSearchProvidesData(ProviderData): """Provider-side of the OpenSearch relation.""" + RESOURCE_FIELD = "index" + def __init__(self, model: Model, relation_name: str) -> None: super().__init__(model, relation_name) diff --git a/examples/go/charm/lib/charms/loki_k8s/v0/loki_push_api.py b/examples/go/charm/lib/charms/loki_k8s/v0/loki_push_api.py index d5217f3..16e1294 100644 --- a/examples/go/charm/lib/charms/loki_k8s/v0/loki_push_api.py +++ b/examples/go/charm/lib/charms/loki_k8s/v0/loki_push_api.py @@ -16,9 +16,10 @@ applications such as pebble, or charmed operators of workloads such as grafana-agent or promtail, that can communicate with loki directly. -- `LogProxyConsumer`: This object can be used by any Charmed Operator which needs to -send telemetry, such as logs, to Loki through a Log Proxy by implementing the consumer side of the -`loki_push_api` relation interface. +- `LogProxyConsumer`: DEPRECATED. +This object can be used by any Charmed Operator which needs to send telemetry, such as logs, to +Loki through a Log Proxy by implementing the consumer side of the `loki_push_api` relation +interface. Filtering logs in Loki is largely performed on the basis of labels. In the Juju ecosystem, Juju topology labels are used to uniquely identify the workload which generates telemetry like logs. @@ -38,13 +39,14 @@ - `charm`: A reference to the parent (Loki) charm. - `relation_name`: The name of the relation that the charm uses to interact - with its clients, which implement `LokiPushApiConsumer` or `LogProxyConsumer`. + with its clients, which implement `LokiPushApiConsumer` or `LogProxyConsumer` + (note that LogProxyConsumer is deprecated). If provided, this relation name must match a provided relation in metadata.yaml with the `loki_push_api` interface. The default relation name is "logging" for `LokiPushApiConsumer` and "log-proxy" for - `LogProxyConsumer`. + `LogProxyConsumer` (note that LogProxyConsumer is deprecated). For example, a provider's `metadata.yaml` file may look as follows: @@ -219,6 +221,9 @@ def __init__(self, *args): ## LogProxyConsumer Library Usage +> Note: This object is deprecated. Consider migrating to LogForwarder (see v1/loki_push_api) with +> the release of Juju 3.6 LTS. + Let's say that we have a workload charm that produces logs, and we need to send those logs to a workload implementing the `loki_push_api` interface, such as `Loki` or `Grafana Agent`. @@ -480,7 +485,7 @@ def _alert_rules_error(self, event): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 29 +LIBPATCH = 30 PYDEPS = ["cosl"] @@ -1539,7 +1544,8 @@ def __init__( the Loki API endpoint to push logs. It is intended for workloads that can speak loki_push_api (https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki), such as grafana-agent. - (If you only need to forward a few workload log files, then use LogProxyConsumer.) + (If you need to forward workload stdout logs, then use v1/loki_push_api.LogForwarder; if + you need to forward log files, then use LogProxyConsumer.) `LokiPushApiConsumer` can be instantiated as follows: @@ -1728,6 +1734,9 @@ class LogProxyEvents(ObjectEvents): class LogProxyConsumer(ConsumerBase): """LogProxyConsumer class. + > Note: This object is deprecated. Consider migrating to v1/loki_push_api.LogForwarder with the + > release of Juju 3.6 LTS. + The `LogProxyConsumer` object provides a method for attaching `promtail` to a workload in order to generate structured logging data from applications which traditionally log to syslog or do not have native Loki integration. diff --git a/examples/go/charm/lib/charms/loki_k8s/v1/loki_push_api.py b/examples/go/charm/lib/charms/loki_k8s/v1/loki_push_api.py new file mode 100644 index 0000000..d75cb7e --- /dev/null +++ b/examples/go/charm/lib/charms/loki_k8s/v1/loki_push_api.py @@ -0,0 +1,2823 @@ +#!/usr/bin/env python3 +# Copyright 2023 Canonical Ltd. +# See LICENSE file for licensing details. +# +# Learn more at: https://juju.is/docs/sdk + +r"""## Overview. + +This document explains how to use the two principal objects this library provides: + +- `LokiPushApiProvider`: This object is meant to be used by any Charmed Operator that needs to +implement the provider side of the `loki_push_api` relation interface. For instance, a Loki charm. +The provider side of the relation represents the server side, to which logs are being pushed. + +- `LokiPushApiConsumer`: This object is meant to be used by any Charmed Operator that needs to +send log to Loki by implementing the consumer side of the `loki_push_api` relation interface. +For instance, a Promtail or Grafana agent charm which needs to send logs to Loki. + +- `LogProxyConsumer`: DEPRECATED. +This object can be used by any Charmed Operator which needs to send telemetry, such as logs, to +Loki through a Log Proxy by implementing the consumer side of the `loki_push_api` relation +interface. +In order to be able to control the labels on the logs pushed this object adds a Pebble layer +that runs Promtail in the workload container, injecting Juju topology labels into the +logs on the fly. +This object is deprecated. Consider migrating to LogForwarder with the release of Juju 3.6 LTS. + +- `LogForwarder`: This object can be used by any Charmed Operator which needs to send the workload +standard output (stdout) through Pebble's log forwarding mechanism, to Loki endpoints through the +`loki_push_api` relation interface. +In order to be able to control the labels on the logs pushed this object updates the pebble layer's +"log-targets" section with Juju topology. + +Filtering logs in Loki is largely performed on the basis of labels. In the Juju ecosystem, Juju +topology labels are used to uniquely identify the workload which generates telemetry like logs. + + +## LokiPushApiProvider Library Usage + +This object may be used by any Charmed Operator which implements the `loki_push_api` interface. +For instance, Loki or Grafana Agent. + +For this purpose a charm needs to instantiate the `LokiPushApiProvider` object with one mandatory +and three optional arguments. + +- `charm`: A reference to the parent (Loki) charm. + +- `relation_name`: The name of the relation that the charm uses to interact + with its clients, which implement `LokiPushApiConsumer` `LogForwarder`, or `LogProxyConsumer` + (note that LogProxyConsumer is deprecated). + + If provided, this relation name must match a provided relation in metadata.yaml with the + `loki_push_api` interface. + + The default relation name is "logging" for `LokiPushApiConsumer` and `LogForwarder`, and + "log-proxy" for `LogProxyConsumer` (note that LogProxyConsumer is deprecated). + + For example, a provider's `metadata.yaml` file may look as follows: + + ```yaml + provides: + logging: + interface: loki_push_api + ``` + + Subsequently, a Loki charm may instantiate the `LokiPushApiProvider` in its constructor as + follows: + + from charms.loki_k8s.v1.loki_push_api import LokiPushApiProvider + from loki_server import LokiServer + ... + + class LokiOperatorCharm(CharmBase): + ... + + def __init__(self, *args): + super().__init__(*args) + ... + external_url = urlparse(self._external_url) + self.loki_provider = LokiPushApiProvider( + self, + address=external_url.hostname or self.hostname, + port=external_url.port or 80, + scheme=external_url.scheme, + path=f"{external_url.path}/loki/api/v1/push", + ) + ... + + - `port`: Loki Push Api endpoint port. Default value: `3100`. + - `scheme`: Loki Push Api endpoint scheme (`HTTP` or `HTTPS`). Default value: `HTTP` + - `address`: Loki Push Api endpoint address. Default value: `localhost` + - `path`: Loki Push Api endpoint path. Default value: `loki/api/v1/push` + + +The `LokiPushApiProvider` object has several responsibilities: + +1. Set the URL of the Loki Push API in the relation application data bag; the URL + must be unique to all instances (e.g. using a load balancer). + +2. Set the Promtail binary URL (`promtail_binary_zip_url`) so clients that use + `LogProxyConsumer` object could download and configure it. + +3. Process the metadata of the consumer application, provided via the + "metadata" field of the consumer data bag, which are used to annotate the + alert rules (see next point). An example for "metadata" is the following: + + {'model': 'loki', + 'model_uuid': '0b7d1071-ded2-4bf5-80a3-10a81aeb1386', + 'application': 'promtail-k8s' + } + +4. Process alert rules set into the relation by the `LokiPushApiConsumer` + objects, e.g.: + + '{ + "groups": [{ + "name": "loki_0b7d1071-ded2-4bf5-80a3-10a81aeb1386_promtail-k8s_alerts", + "rules": [{ + "alert": "HighPercentageError", + "expr": "sum(rate({app=\\"foo\\", env=\\"production\\"} |= \\"error\\" [5m])) + by (job) \\n /\\nsum(rate({app=\\"foo\\", env=\\"production\\"}[5m])) + by (job)\\n > 0.05 + \\n", "for": "10m", + "labels": { + "severity": "page", + "juju_model": "loki", + "juju_model_uuid": "0b7d1071-ded2-4bf5-80a3-10a81aeb1386", + "juju_application": "promtail-k8s" + }, + "annotations": { + "summary": "High request latency" + } + }] + }] + }' + + +Once these alert rules are sent over relation data, the `LokiPushApiProvider` object +stores these files in the directory `/loki/rules` inside the Loki charm container. After +storing alert rules files, the object will check alert rules by querying Loki API +endpoint: [`loki/api/v1/rules`](https://grafana.com/docs/loki/latest/api/#list-rule-groups). +If there are changes in the alert rules a `loki_push_api_alert_rules_changed` event will +be emitted with details about the `RelationEvent` which triggered it. + +This events should be observed in the charm that uses `LokiPushApiProvider`: + +```python + def __init__(self, *args): + super().__init__(*args) + ... + self.loki_provider = LokiPushApiProvider(self) + self.framework.observe( + self.loki_provider.on.loki_push_api_alert_rules_changed, + self._loki_push_api_alert_rules_changed, + ) +``` + + +## LokiPushApiConsumer Library Usage + +This Loki charm interacts with its clients using the Loki charm library. Charms +seeking to send log to Loki, must do so using the `LokiPushApiConsumer` object from +this charm library. + +> **NOTE**: `LokiPushApiConsumer` also depends on an additional charm library. +> +> Ensure sure you `charmcraft fetch-lib charms.observability_libs.v0.juju_topology` +> when using this library. + +For the simplest use cases, using the `LokiPushApiConsumer` object only requires +instantiating it, typically in the constructor of your charm (the one which +sends logs). + +```python +from charms.loki_k8s.v1.loki_push_api import LokiPushApiConsumer + +class LokiClientCharm(CharmBase): + + def __init__(self, *args): + super().__init__(*args) + ... + self._loki_consumer = LokiPushApiConsumer(self) +``` + +The `LokiPushApiConsumer` constructor requires two things: + +- A reference to the parent (LokiClientCharm) charm. + +- Optionally, the name of the relation that the Loki charm uses to interact + with its clients. If provided, this relation name must match a required + relation in metadata.yaml with the `loki_push_api` interface. + + This argument is not required if your metadata.yaml has precisely one + required relation in metadata.yaml with the `loki_push_api` interface, as the + lib will automatically resolve the relation name inspecting the using the + meta information of the charm + +Any time the relation between a Loki provider charm and a Loki consumer charm is +established, a `LokiPushApiEndpointJoined` event is fired. In the consumer side +is it possible to observe this event with: + +```python + +self.framework.observe( + self._loki_consumer.on.loki_push_api_endpoint_joined, + self._on_loki_push_api_endpoint_joined, +) +``` + +Any time there are departures in relations between the consumer charm and Loki +the consumer charm is informed, through a `LokiPushApiEndpointDeparted` event, for instance: + +```python +self.framework.observe( + self._loki_consumer.on.loki_push_api_endpoint_departed, + self._on_loki_push_api_endpoint_departed, +) +``` + +The consumer charm can then choose to update its configuration in both situations. + +Note that LokiPushApiConsumer does not add any labels automatically on its own. In +order to better integrate with the Canonical Observability Stack, you may want to configure your +software to add Juju topology labels. The +[observability-libs](https://charmhub.io/observability-libs) library can be used to get topology +labels in charm code. See :func:`LogProxyConsumer._scrape_configs` for an example of how +to do this with promtail. + +## LogProxyConsumer Library Usage + +> Note: This object is deprecated. Consider migrating to LogForwarder with the release of Juju 3.6 +> LTS. + +Let's say that we have a workload charm that produces logs, and we need to send those logs to a +workload implementing the `loki_push_api` interface, such as `Loki` or `Grafana Agent`. + +Adopting this object in a Charmed Operator consist of two steps: + +1. Use the `LogProxyConsumer` class by instantiating it in the `__init__` method of the charmed + operator. There are two ways to get logs in to promtail. You can give it a list of files to + read, or you can write to it using the syslog protocol. + + For example: + + ```python + from charms.loki_k8s.v1.loki_push_api import LogProxyConsumer + + ... + + def __init__(self, *args): + ... + self._log_proxy = LogProxyConsumer( + self, + logs_scheme={ + "workload-a": { + "log-files": ["/tmp/worload-a-1.log", "/tmp/worload-a-2.log"], + "syslog-port": 1514, + }, + "workload-b": {"log-files": ["/tmp/worload-b.log"], "syslog-port": 1515}, + }, + relation_name="log-proxy", + ) + self.framework.observe( + self._log_proxy.on.promtail_digest_error, + self._promtail_error, + ) + + def _promtail_error(self, event): + logger.error(event.message) + self.unit.status = BlockedStatus(event.message) + ``` + + Any time the relation between a provider charm and a LogProxy consumer charm is + established, a `LogProxyEndpointJoined` event is fired. In the consumer side is it + possible to observe this event with: + + ```python + + self.framework.observe( + self._log_proxy.on.log_proxy_endpoint_joined, + self._on_log_proxy_endpoint_joined, + ) + ``` + + Any time there are departures in relations between the consumer charm and the provider + the consumer charm is informed, through a `LogProxyEndpointDeparted` event, for instance: + + ```python + self.framework.observe( + self._log_proxy.on.log_proxy_endpoint_departed, + self._on_log_proxy_endpoint_departed, + ) + ``` + + The consumer charm can then choose to update its configuration in both situations. + + Note that: + + - You can configure your syslog software using `localhost` as the address and the method + `LogProxyConsumer.syslog_port("container_name")` to get the port, or, alternatively, if you are using rsyslog + you may use the method `LogProxyConsumer.rsyslog_config("container_name")`. + +2. Modify the `metadata.yaml` file to add: + + - The `log-proxy` relation in the `requires` section: + ```yaml + requires: + log-proxy: + interface: loki_push_api + optional: true + ``` + +Once the library is implemented in a Charmed Operator and a relation is established with +the charm that implements the `loki_push_api` interface, the library will inject a +Pebble layer that runs Promtail in the workload container to send logs. + +By default, the promtail binary injected into the container will be downloaded from the internet. +If, for any reason, the container has limited network access, you may allow charm administrators +to provide their own promtail binary at runtime by adding the following snippet to your charm +metadata: + +```yaml +resources: + promtail-bin: + type: file + description: Promtail binary for logging + filename: promtail-linux +``` + +Which would then allow operators to deploy the charm this way: + +``` +juju deploy \ + ./your_charm.charm \ + --resource promtail-bin=/tmp/promtail-linux-amd64 +``` + +If a different resource name is used, it can be specified with the `promtail_resource_name` +argument to the `LogProxyConsumer` constructor. + +The object can emit a `PromtailDigestError` event: + +- Promtail binary cannot be downloaded. +- The sha256 sum mismatch for promtail binary. + +The object can raise a `ContainerNotFoundError` event: + +- No `container_name` parameter has been specified and the Pod has more than 1 container. + +These can be monitored via the PromtailDigestError events via: + +```python + self.framework.observe( + self._loki_consumer.on.promtail_digest_error, + self._promtail_error, + ) + + def _promtail_error(self, event): + logger.error(msg) + self.unit.status = BlockedStatus(event.message) + ) +``` + +## LogForwarder class Usage + +Let's say that we have a charm's workload that writes logs to the standard output (stdout), +and we need to send those logs to a workload implementing the `loki_push_api` interface, +such as `Loki` or `Grafana Agent`. To know how to reach a Loki instance, a charm would +typically use the `loki_push_api` interface. + +Use the `LogForwarder` class by instantiating it in the `__init__` method of the charm: + +```python +from charms.loki_k8s.v1.loki_push_api import LogForwarder + +... + + def __init__(self, *args): + ... + self._log_forwarder = LogForwarder( + self, + relation_name="logging" # optional, defaults to `logging` + ) +``` + +The `LogForwarder` by default will observe relation events on the `logging` endpoint and +enable/disable log forwarding automatically. +Next, modify the `metadata.yaml` file to add: + +The `log-forwarding` relation in the `requires` section: +```yaml +requires: + logging: + interface: loki_push_api + optional: true +``` + +Once the LogForwader class is implemented in your charm and the relation (implementing the +`loki_push_api` interface) is active and healthy, the library will inject a Pebble layer in +each workload container the charm has access to, to configure Pebble's log forwarding +feature and start sending logs to Loki. + +## Alerting Rules + +This charm library also supports gathering alerting rules from all related Loki client +charms and enabling corresponding alerts within the Loki charm. Alert rules are +automatically gathered by `LokiPushApiConsumer` object from a directory conventionally +named `loki_alert_rules`. + +This directory must reside at the top level in the `src` folder of the +consumer charm. Each file in this directory is assumed to be a single alert rule +in YAML format. The file name must have the `.rule` extension. +The format of this alert rule conforms to the +[Loki docs](https://grafana.com/docs/loki/latest/rules/#alerting-rules). + +An example of the contents of one such file is shown below. + +```yaml +alert: HighPercentageError +expr: | + sum(rate({%%juju_topology%%} |= "error" [5m])) by (job) + / + sum(rate({%%juju_topology%%}[5m])) by (job) + > 0.05 +for: 10m +labels: + severity: page +annotations: + summary: High request latency + +``` + +It is **critical** to use the `%%juju_topology%%` filter in the expression for the alert +rule shown above. This filter is a stub that is automatically replaced by the +`LokiPushApiConsumer` following Loki Client's Juju topology (application, model and its +UUID). Such a topology filter is essential to ensure that alert rules submitted by one +provider charm generates alerts only for that same charm. + +The Loki charm may be related to multiple Loki client charms. Without this, filter +rules submitted by one provider charm will also result in corresponding alerts for other +provider charms. Hence, every alert rule expression must include such a topology filter stub. + +Gathering alert rules and generating rule files within the Loki charm is easily done using +the `alerts()` method of `LokiPushApiProvider`. Alerts generated by Loki will automatically +include Juju topology labels in the alerts. These labels indicate the source of the alert. + +The following labels are automatically added to every alert + +- `juju_model` +- `juju_model_uuid` +- `juju_application` + + +Whether alert rules files does not contain the keys `alert` or `expr` or there is no alert +rules file in `alert_rules_path` a `loki_push_api_alert_rules_error` event is emitted. + +To handle these situations the event must be observed in the `LokiClientCharm` charm.py file: + +```python +class LokiClientCharm(CharmBase): + + def __init__(self, *args): + super().__init__(*args) + ... + self._loki_consumer = LokiPushApiConsumer(self) + + self.framework.observe( + self._loki_consumer.on.loki_push_api_alert_rules_error, + self._alert_rules_error + ) + + def _alert_rules_error(self, event): + self.unit.status = BlockedStatus(event.message) +``` + +## Relation Data + +The Loki charm uses both application and unit relation data to obtain information regarding +Loki Push API and alert rules. + +Units of consumer charm send their alert rules over app relation data using the `alert_rules` +key. + +## Charm logging +The `charms.loki_k8s.v0.charm_logging` library can be used in conjunction with this one to configure python's +logging module to forward all logs to Loki via the loki-push-api interface. + +```python +from lib.charms.loki_k8s.v0.charm_logging import log_charm +from lib.charms.loki_k8s.v1.loki_push_api import charm_logging_config, LokiPushApiConsumer + +@log_charm(logging_endpoint="my_endpoints", server_cert="cert_path") +class MyCharm(...): + _cert_path = "/path/to/cert/on/charm/container.crt" + def __init__(self, ...): + self.logging = LokiPushApiConsumer(...) + self.my_endpoints, self.cert_path = charm_logging_config( + self.logging, self._cert_path) +``` + +Do this, and all charm logs will be forwarded to Loki as soon as a relation is formed. +""" + +import json +import logging +import os +import platform +import re +import socket +import subprocess +import tempfile +import typing +from copy import deepcopy +from gzip import GzipFile +from hashlib import sha256 +from io import BytesIO +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple, Union +from urllib import request +from urllib.error import URLError + +import yaml +from cosl import JujuTopology +from ops.charm import ( + CharmBase, + HookEvent, + PebbleReadyEvent, + RelationBrokenEvent, + RelationCreatedEvent, + RelationDepartedEvent, + RelationEvent, + RelationJoinedEvent, + RelationRole, + WorkloadEvent, +) +from ops.framework import EventBase, EventSource, Object, ObjectEvents +from ops.jujuversion import JujuVersion +from ops.model import Container, ModelError, Relation +from ops.pebble import APIError, ChangeError, Layer, PathError, ProtocolError + +# The unique Charmhub library identifier, never change it +LIBID = "bf76f23cdd03464b877c52bd1d2f563e" + +# Increment this major API version when introducing breaking changes +LIBAPI = 1 + +# Increment this PATCH version before using `charmcraft publish-lib` or reset +# to 0 if you are raising the major API version +LIBPATCH = 13 + +PYDEPS = ["cosl"] + +logger = logging.getLogger(__name__) + +RELATION_INTERFACE_NAME = "loki_push_api" +DEFAULT_RELATION_NAME = "logging" +DEFAULT_ALERT_RULES_RELATIVE_PATH = "./src/loki_alert_rules" +DEFAULT_LOG_PROXY_RELATION_NAME = "log-proxy" + +PROMTAIL_BASE_URL = "https://github.com/canonical/loki-k8s-operator/releases/download" +# To update Promtail version you only need to change the PROMTAIL_VERSION and +# update all sha256 sums in PROMTAIL_BINARIES. To support a new architecture +# you only need to add a new key value pair for the architecture in PROMTAIL_BINARIES. +PROMTAIL_VERSION = "v2.9.7" +PROMTAIL_ARM_BINARY = { + "filename": "promtail-static-arm64", + "zipsha": "c083fdb45e5c794103f974eeb426489b4142438d9e10d0ae272b2aff886e249b", + "binsha": "4cd055c477a301c0bdfdbcea514e6e93f6df5d57425ce10ffc77f3e16fec1ddf", +} + +PROMTAIL_BINARIES = { + "amd64": { + "filename": "promtail-static-amd64", + "zipsha": "6873cbdabf23062aeefed6de5f00ff382710332af3ab90a48c253ea17e08f465", + "binsha": "28da9b99f81296fe297831f3bc9d92aea43b4a92826b8ff04ba433b8cb92fb50", + }, + "arm64": PROMTAIL_ARM_BINARY, + "aarch64": PROMTAIL_ARM_BINARY, +} + +# Paths in `charm` container +BINARY_DIR = "/tmp" + +# Paths in `workload` container +WORKLOAD_BINARY_DIR = "/opt/promtail" +WORKLOAD_CONFIG_DIR = "/etc/promtail" +WORKLOAD_CONFIG_FILE_NAME = "promtail_config.yaml" +WORKLOAD_CONFIG_PATH = "{}/{}".format(WORKLOAD_CONFIG_DIR, WORKLOAD_CONFIG_FILE_NAME) +WORKLOAD_POSITIONS_PATH = "{}/positions.yaml".format(WORKLOAD_BINARY_DIR) +WORKLOAD_SERVICE_NAME = "promtail" + +# These are the initial port values. As we can have more than one container, +# we use odd and even numbers to avoid collisions. +# Each new container adds 2 to the previous value. +HTTP_LISTEN_PORT_START = 9080 # even start port +GRPC_LISTEN_PORT_START = 9095 # odd start port + + +class LokiPushApiError(Exception): + """Base class for errors raised by this module.""" + + +class RelationNotFoundError(LokiPushApiError): + """Raised if there is no relation with the given name.""" + + def __init__(self, relation_name: str): + self.relation_name = relation_name + self.message = "No relation named '{}' found".format(relation_name) + + super().__init__(self.message) + + +class RelationInterfaceMismatchError(LokiPushApiError): + """Raised if the relation with the given name has a different interface.""" + + def __init__( + self, + relation_name: str, + expected_relation_interface: str, + actual_relation_interface: str, + ): + self.relation_name = relation_name + self.expected_relation_interface = expected_relation_interface + self.actual_relation_interface = actual_relation_interface + self.message = ( + "The '{}' relation has '{}' as interface rather than the expected '{}'".format( + relation_name, actual_relation_interface, expected_relation_interface + ) + ) + super().__init__(self.message) + + +class RelationRoleMismatchError(LokiPushApiError): + """Raised if the relation with the given name has a different direction.""" + + def __init__( + self, + relation_name: str, + expected_relation_role: RelationRole, + actual_relation_role: RelationRole, + ): + self.relation_name = relation_name + self.expected_relation_interface = expected_relation_role + self.actual_relation_role = actual_relation_role + self.message = "The '{}' relation has role '{}' rather than the expected '{}'".format( + relation_name, repr(actual_relation_role), repr(expected_relation_role) + ) + super().__init__(self.message) + + +def _validate_relation_by_interface_and_direction( + charm: CharmBase, + relation_name: str, + expected_relation_interface: str, + expected_relation_role: RelationRole, +): + """Verifies that a relation has the necessary characteristics. + + Verifies that the `relation_name` provided: (1) exists in metadata.yaml, + (2) declares as interface the interface name passed as `relation_interface` + and (3) has the right "direction", i.e., it is a relation that `charm` + provides or requires. + + Args: + charm: a `CharmBase` object to scan for the matching relation. + relation_name: the name of the relation to be verified. + expected_relation_interface: the interface name to be matched by the + relation named `relation_name`. + expected_relation_role: whether the `relation_name` must be either + provided or required by `charm`. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the same relation interface + as specified via the `expected_relation_interface` argument. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the same role as specified + via the `expected_relation_role` argument. + """ + if relation_name not in charm.meta.relations: + raise RelationNotFoundError(relation_name) + + relation = charm.meta.relations[relation_name] + + actual_relation_interface = relation.interface_name + if actual_relation_interface != expected_relation_interface: + raise RelationInterfaceMismatchError( + relation_name, + expected_relation_interface, + actual_relation_interface, # pyright: ignore + ) + + if expected_relation_role == RelationRole.provides: + if relation_name not in charm.meta.provides: + raise RelationRoleMismatchError( + relation_name, RelationRole.provides, RelationRole.requires + ) + elif expected_relation_role == RelationRole.requires: + if relation_name not in charm.meta.requires: + raise RelationRoleMismatchError( + relation_name, RelationRole.requires, RelationRole.provides + ) + else: + raise Exception("Unexpected RelationDirection: {}".format(expected_relation_role)) + + +class InvalidAlertRulePathError(Exception): + """Raised if the alert rules folder cannot be found or is otherwise invalid.""" + + def __init__( + self, + alert_rules_absolute_path: Path, + message: str, + ): + self.alert_rules_absolute_path = alert_rules_absolute_path + self.message = message + + super().__init__(self.message) + + +def _is_official_alert_rule_format(rules_dict: dict) -> bool: + """Are alert rules in the upstream format as supported by Loki. + + Alert rules in dictionary format are in "official" form if they + contain a "groups" key, since this implies they contain a list of + alert rule groups. + + Args: + rules_dict: a set of alert rules in Python dictionary format + + Returns: + True if alert rules are in official Loki file format. + """ + return "groups" in rules_dict + + +def _is_single_alert_rule_format(rules_dict: dict) -> bool: + """Are alert rules in single rule format. + + The Loki charm library supports reading of alert rules in a + custom format that consists of a single alert rule per file. This + does not conform to the official Loki alert rule file format + which requires that each alert rules file consists of a list of + alert rule groups and each group consists of a list of alert + rules. + + Alert rules in dictionary form are considered to be in single rule + format if in the least it contains two keys corresponding to the + alert rule name and alert expression. + + Returns: + True if alert rule is in single rule file format. + """ + # one alert rule per file + return set(rules_dict) >= {"alert", "expr"} + + +class AlertRules: + """Utility class for amalgamating Loki alert rule files and injecting juju topology. + + An `AlertRules` object supports aggregating alert rules from files and directories in both + official and single rule file formats using the `add_path()` method. All the alert rules + read are annotated with Juju topology labels and amalgamated into a single data structure + in the form of a Python dictionary using the `as_dict()` method. Such a dictionary can be + easily dumped into JSON format and exchanged over relation data. The dictionary can also + be dumped into YAML format and written directly into an alert rules file that is read by + Loki. Note that multiple `AlertRules` objects must not be written into the same file, + since Loki allows only a single list of alert rule groups per alert rules file. + + The official Loki format is a YAML file conforming to the Loki documentation + (https://grafana.com/docs/loki/latest/api/#list-rule-groups). + The custom single rule format is a subsection of the official YAML, having a single alert + rule, effectively "one alert per file". + """ + + # This class uses the following terminology for the various parts of a rule file: + # - alert rules file: the entire groups[] yaml, including the "groups:" key. + # - alert groups (plural): the list of groups[] (a list, i.e. no "groups:" key) - it is a list + # of dictionaries that have the "name" and "rules" keys. + # - alert group (singular): a single dictionary that has the "name" and "rules" keys. + # - alert rules (plural): all the alerts in a given alert group - a list of dictionaries with + # the "alert" and "expr" keys. + # - alert rule (singular): a single dictionary that has the "alert" and "expr" keys. + + def __init__(self, topology: Optional[JujuTopology] = None): + """Build and alert rule object. + + Args: + topology: a `JujuTopology` instance that is used to annotate all alert rules. + """ + self.topology = topology + self.tool = CosTool(None) + self.alert_groups = [] # type: List[dict] + + def _from_file(self, root_path: Path, file_path: Path) -> List[dict]: + """Read a rules file from path, injecting juju topology. + + Args: + root_path: full path to the root rules folder (used only for generating group name) + file_path: full path to a *.rule file. + + Returns: + A list of dictionaries representing the rules file, if file is valid (the structure is + formed by `yaml.safe_load` of the file); an empty list otherwise. + """ + with file_path.open() as rf: + # Load a list of rules from file then add labels and filters + try: + rule_file = yaml.safe_load(rf) or {} + + except Exception as e: + logger.error("Failed to read alert rules from %s: %s", file_path.name, e) + return [] + + if _is_official_alert_rule_format(rule_file): + alert_groups = rule_file["groups"] + elif _is_single_alert_rule_format(rule_file): + # convert to list of alert groups + # group name is made up from the file name + alert_groups = [{"name": file_path.stem, "rules": [rule_file]}] + else: + # invalid/unsupported + reason = "file is empty" if not rule_file else "unexpected file structure" + logger.error("Invalid rules file (%s): %s", reason, file_path.name) + return [] + + # update rules with additional metadata + for alert_group in alert_groups: + # update group name with topology and sub-path + alert_group["name"] = self._group_name( + str(root_path), + str(file_path), + alert_group["name"], + ) + + # add "juju_" topology labels + for alert_rule in alert_group["rules"]: + if "labels" not in alert_rule: + alert_rule["labels"] = {} + + if self.topology: + # only insert labels that do not already exist + for label, val in self.topology.label_matcher_dict.items(): + if label not in alert_rule["labels"]: + alert_rule["labels"][label] = val + + # insert juju topology filters into a prometheus alert rule + # logql doesn't like empty matchers, so add a job matcher which hits + # any string as a "wildcard" which the topology labels will + # filter down + alert_rule["expr"] = self.tool.inject_label_matchers( + re.sub(r"%%juju_topology%%", r'job=~".+"', alert_rule["expr"]), + self.topology.label_matcher_dict, + ) + + return alert_groups + + def _group_name( + self, + root_path: typing.Union[Path, str], + file_path: typing.Union[Path, str], + group_name: str, + ) -> str: + """Generate group name from path and topology. + + The group name is made up of the relative path between the root dir_path, the file path, + and topology identifier. + + Args: + root_path: path to the root rules dir. + file_path: path to rule file. + group_name: original group name to keep as part of the new augmented group name + + Returns: + New group name, augmented by juju topology and relative path. + """ + file_path = Path(file_path) if not isinstance(file_path, Path) else file_path + root_path = Path(root_path) if not isinstance(root_path, Path) else root_path + rel_path = file_path.parent.relative_to(root_path.as_posix()) + + # We should account for both absolute paths and Windows paths. Convert it to a POSIX + # string, strip off any leading /, then join it + + path_str = "" + if not rel_path == Path("."): + # Get rid of leading / and optionally drive letters so they don't muck up + # the template later, since Path.parts returns them. The 'if relpath.is_absolute ...' + # isn't even needed since re.sub doesn't throw exceptions if it doesn't match, so it's + # optional, but it makes it clear what we're doing. + + # Note that Path doesn't actually care whether the path is valid just to instantiate + # the object, so we can happily strip that stuff out to make templating nicer + rel_path = Path( + re.sub(r"^([A-Za-z]+:)?/", "", rel_path.as_posix()) + if rel_path.is_absolute() + else str(rel_path) + ) + + # Get rid of relative path characters in the middle which both os.path and pathlib + # leave hanging around. We could use path.resolve(), but that would lead to very + # long template strings when rules come from pods and/or other deeply nested charm + # paths + path_str = "_".join(filter(lambda x: x not in ["..", "/"], rel_path.parts)) + + # Generate group name: + # - name, from juju topology + # - suffix, from the relative path of the rule file; + group_name_parts = [self.topology.identifier] if self.topology else [] + group_name_parts.extend([path_str, group_name, "alerts"]) + # filter to remove empty strings + return "_".join(filter(lambda x: x, group_name_parts)) + + @classmethod + def _multi_suffix_glob( + cls, dir_path: Path, suffixes: List[str], recursive: bool = True + ) -> list: + """Helper function for getting all files in a directory that have a matching suffix. + + Args: + dir_path: path to the directory to glob from. + suffixes: list of suffixes to include in the glob (items should begin with a period). + recursive: a flag indicating whether a glob is recursive (nested) or not. + + Returns: + List of files in `dir_path` that have one of the suffixes specified in `suffixes`. + """ + all_files_in_dir = dir_path.glob("**/*" if recursive else "*") + return list(filter(lambda f: f.is_file() and f.suffix in suffixes, all_files_in_dir)) + + def _from_dir(self, dir_path: Path, recursive: bool) -> List[dict]: + """Read all rule files in a directory. + + All rules from files for the same directory are loaded into a single + group. The generated name of this group includes juju topology. + By default, only the top directory is scanned; for nested scanning, pass `recursive=True`. + + Args: + dir_path: directory containing *.rule files (alert rules without groups). + recursive: flag indicating whether to scan for rule files recursively. + + Returns: + a list of dictionaries representing prometheus alert rule groups, each dictionary + representing an alert group (structure determined by `yaml.safe_load`). + """ + alert_groups = [] # type: List[dict] + + # Gather all alerts into a list of groups + for file_path in self._multi_suffix_glob(dir_path, [".rule", ".rules"], recursive): + alert_groups_from_file = self._from_file(dir_path, file_path) + if alert_groups_from_file: + logger.debug("Reading alert rule from %s", file_path) + alert_groups.extend(alert_groups_from_file) + + return alert_groups + + def add_path(self, path_str: str, *, recursive: bool = False): + """Add rules from a dir path. + + All rules from files are aggregated into a data structure representing a single rule file. + All group names are augmented with juju topology. + + Args: + path_str: either a rules file or a dir of rules files. + recursive: whether to read files recursively or not (no impact if `path` is a file). + + Raises: + InvalidAlertRulePathError: if the provided path is invalid. + """ + path = Path(path_str) # type: Path + if path.is_dir(): + self.alert_groups.extend(self._from_dir(path, recursive)) + elif path.is_file(): + self.alert_groups.extend(self._from_file(path.parent, path)) + else: + logger.debug("The alerts file does not exist: %s", path) + + def as_dict(self) -> dict: + """Return standard alert rules file in dict representation. + + Returns: + a dictionary containing a single list of alert rule groups. + The list of alert rule groups is provided as value of the + "groups" dictionary key. + """ + return {"groups": self.alert_groups} if self.alert_groups else {} + + +def _resolve_dir_against_charm_path(charm: CharmBase, *path_elements: str) -> str: + """Resolve the provided path items against the directory of the main file. + + Look up the directory of the `main.py` file being executed. This is normally + going to be the charm.py file of the charm including this library. Then, resolve + the provided path elements and, if the result path exists and is a directory, + return its absolute path; otherwise, raise en exception. + + Raises: + InvalidAlertRulePathError, if the path does not exist or is not a directory. + """ + charm_dir = Path(str(charm.charm_dir)) + if not charm_dir.exists() or not charm_dir.is_dir(): + # Operator Framework does not currently expose a robust + # way to determine the top level charm source directory + # that is consistent across deployed charms and unit tests + # Hence for unit tests the current working directory is used + # TODO: updated this logic when the following ticket is resolved + # https://github.com/canonical/operator/issues/643 + charm_dir = Path(os.getcwd()) + + alerts_dir_path = charm_dir.absolute().joinpath(*path_elements) + + if not alerts_dir_path.exists(): + raise InvalidAlertRulePathError(alerts_dir_path, "directory does not exist") + if not alerts_dir_path.is_dir(): + raise InvalidAlertRulePathError(alerts_dir_path, "is not a directory") + + return str(alerts_dir_path) + + +class NoRelationWithInterfaceFoundError(Exception): + """No relations with the given interface are found in the charm meta.""" + + def __init__(self, charm: CharmBase, relation_interface: Optional[str] = None): + self.charm = charm + self.relation_interface = relation_interface + self.message = ( + "No relations with interface '{}' found in the meta of the '{}' charm".format( + relation_interface, charm.meta.name + ) + ) + + super().__init__(self.message) + + +class MultipleRelationsWithInterfaceFoundError(Exception): + """Multiple relations with the given interface are found in the charm meta.""" + + def __init__(self, charm: CharmBase, relation_interface: str, relations: list): + self.charm = charm + self.relation_interface = relation_interface + self.relations = relations + self.message = ( + "Multiple relations with interface '{}' found in the meta of the '{}' charm.".format( + relation_interface, charm.meta.name + ) + ) + super().__init__(self.message) + + +class LokiPushApiEndpointDeparted(EventBase): + """Event emitted when Loki departed.""" + + +class LokiPushApiEndpointJoined(EventBase): + """Event emitted when Loki joined.""" + + +class LokiPushApiAlertRulesChanged(EventBase): + """Event emitted if there is a change in the alert rules.""" + + def __init__(self, handle, relation, relation_id, app=None, unit=None): + """Pretend we are almost like a RelationEvent. + + Fields to serialize: + { + "relation_name": , + "relation_id": , + "app_name": , + "unit_name": + } + + In this way, we can transparently use `RelationEvent.snapshot()` to pass + it back if we need to log it. + """ + super().__init__(handle) + self.relation = relation + self.relation_id = relation_id + self.app = app + self.unit = unit + + def snapshot(self) -> Dict: + """Save event information.""" + if not self.relation: + return {} + snapshot = {"relation_name": self.relation.name, "relation_id": self.relation.id} + if self.app: + snapshot["app_name"] = self.app.name + if self.unit: + snapshot["unit_name"] = self.unit.name + return snapshot + + def restore(self, snapshot: dict): + """Restore event information.""" + self.relation = self.framework.model.get_relation( + snapshot["relation_name"], snapshot["relation_id"] + ) + app_name = snapshot.get("app_name") + if app_name: + self.app = self.framework.model.get_app(app_name) + else: + self.app = None + unit_name = snapshot.get("unit_name") + if unit_name: + self.unit = self.framework.model.get_unit(unit_name) + else: + self.unit = None + + +class InvalidAlertRuleEvent(EventBase): + """Event emitted when alert rule files are not parsable. + + Enables us to set a clear status on the provider. + """ + + def __init__(self, handle, errors: str = "", valid: bool = False): + super().__init__(handle) + self.errors = errors + self.valid = valid + + def snapshot(self) -> Dict: + """Save alert rule information.""" + return { + "valid": self.valid, + "errors": self.errors, + } + + def restore(self, snapshot): + """Restore alert rule information.""" + self.valid = snapshot["valid"] + self.errors = snapshot["errors"] + + +class LokiPushApiEvents(ObjectEvents): + """Event descriptor for events raised by `LokiPushApiProvider`.""" + + loki_push_api_endpoint_departed = EventSource(LokiPushApiEndpointDeparted) + loki_push_api_endpoint_joined = EventSource(LokiPushApiEndpointJoined) + loki_push_api_alert_rules_changed = EventSource(LokiPushApiAlertRulesChanged) + alert_rule_status_changed = EventSource(InvalidAlertRuleEvent) + + +class LokiPushApiProvider(Object): + """A LokiPushApiProvider class.""" + + on = LokiPushApiEvents() # pyright: ignore + + def __init__( + self, + charm, + relation_name: str = DEFAULT_RELATION_NAME, + *, + port: Union[str, int] = 3100, + scheme: str = "http", + address: str = "localhost", + path: str = "loki/api/v1/push", + ): + """A Loki service provider. + + Args: + charm: a `CharmBase` instance that manages this + instance of the Loki service. + relation_name: an optional string name of the relation between `charm` + and the Loki charmed service. The default is "logging". + It is strongly advised not to change the default, so that people + deploying your charm will have a consistent experience with all + other charms that consume metrics endpoints. + port: an optional port of the Loki service (default is "3100"). + scheme: an optional scheme of the Loki API URL (default is "http"). + address: an optional address of the Loki service (default is "localhost"). + path: an optional path of the Loki API URL (default is "loki/api/v1/push") + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `loki_push_api` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.requires` + role. + """ + _validate_relation_by_interface_and_direction( + charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.provides + ) + super().__init__(charm, relation_name) + self._charm = charm + self._relation_name = relation_name + self._tool = CosTool(self) + self.port = int(port) + self.scheme = scheme + self.address = address + self.path = path + + events = self._charm.on[relation_name] + self.framework.observe(self._charm.on.upgrade_charm, self._on_lifecycle_event) + self.framework.observe(events.relation_joined, self._on_logging_relation_joined) + self.framework.observe(events.relation_changed, self._on_logging_relation_changed) + self.framework.observe(events.relation_departed, self._on_logging_relation_departed) + self.framework.observe(events.relation_broken, self._on_logging_relation_broken) + + def _on_lifecycle_event(self, _): + # Upgrade event or other charm-level event + should_update = False + for relation in self._charm.model.relations[self._relation_name]: + # Don't accidentally flip a True result back. + should_update = should_update or self._process_logging_relation_changed(relation) + if should_update: + # We don't have a RelationEvent, so build it up by hand + first_rel = self._charm.model.relations[self._relation_name][0] + self.on.loki_push_api_alert_rules_changed.emit( + relation=first_rel, + relation_id=first_rel.id, + ) + + def _on_logging_relation_joined(self, event: RelationJoinedEvent): + """Set basic data on relation joins. + + Set the promtail binary URL location, which will not change, and anything + else which may be required, but is static.. + + Args: + event: a `CharmEvent` in response to which the consumer + charm must set its relation data. + """ + if self._charm.unit.is_leader(): + event.relation.data[self._charm.app].update(self._promtail_binary_url) + logger.debug("Saved promtail binary url: %s", self._promtail_binary_url) + + def _on_logging_relation_changed(self, event: HookEvent): + """Handle changes in related consumers. + + Anytime there are changes in the relation between Loki + and its consumers charms. + + Args: + event: a `CharmEvent` in response to which the consumer + charm must update its relation data. + """ + should_update = self._process_logging_relation_changed(event.relation) # pyright: ignore + if should_update: + self.on.loki_push_api_alert_rules_changed.emit( + relation=event.relation, # pyright: ignore + relation_id=event.relation.id, # pyright: ignore + app=self._charm.app, + unit=self._charm.unit, + ) + + def _on_logging_relation_broken(self, event: RelationBrokenEvent): + """Removes alert rules files when consumer charms left the relation with Loki. + + Args: + event: a `CharmEvent` in response to which the Loki + charm must update its relation data. + """ + self.on.loki_push_api_alert_rules_changed.emit( + relation=event.relation, + relation_id=event.relation.id, + app=self._charm.app, + unit=self._charm.unit, + ) + + def _on_logging_relation_departed(self, event: RelationDepartedEvent): + """Removes alert rules files when consumer charms left the relation with Loki. + + Args: + event: a `CharmEvent` in response to which the Loki + charm must update its relation data. + """ + self.on.loki_push_api_alert_rules_changed.emit( + relation=event.relation, + relation_id=event.relation.id, + app=self._charm.app, + unit=self._charm.unit, + ) + + def _should_update_alert_rules(self, relation) -> bool: + """Determine whether alert rules should be regenerated. + + If there are alert rules in the relation data bag, tell the charm + whether to regenerate them based on the boolean returned here. + """ + if relation.data.get(relation.app).get("alert_rules", None) is not None: + return True + return False + + def _process_logging_relation_changed(self, relation: Relation) -> bool: + """Handle changes in related consumers. + + Anytime there are changes in relations between Loki + and its consumers charms, Loki set the `loki_push_api` + into the relation data. Set the endpoint building + appropriately, and if there are alert rules present in + the relation, let the caller know. + Besides Loki generates alert rules files based what + consumer charms forwards, + + Args: + relation: the `Relation` instance to update. + + Returns: + A boolean indicating whether an event should be emitted, so we + only emit one on lifecycle events + """ + relation.data[self._charm.unit]["public_address"] = socket.getfqdn() or "" + self.update_endpoint(relation=relation) + return self._should_update_alert_rules(relation) + + @property + def _promtail_binary_url(self) -> dict: + """URL from which Promtail binary can be downloaded.""" + # construct promtail binary url paths from parts + promtail_binaries = {} + for arch, info in PROMTAIL_BINARIES.items(): + info["url"] = "{}/promtail-{}/{}.gz".format( + PROMTAIL_BASE_URL, PROMTAIL_VERSION, info["filename"] + ) + promtail_binaries[arch] = info + + return {"promtail_binary_zip_url": json.dumps(promtail_binaries)} + + def update_endpoint(self, url: str = "", relation: Optional[Relation] = None) -> None: + """Triggers programmatically the update of endpoint in unit relation data. + + This method should be used when the charm relying on this library needs + to update the relation data in response to something occurring outside + the `logging` relation lifecycle, e.g., in case of a + host address change because the charmed operator becomes connected to an + Ingress after the `logging` relation is established. + + Args: + url: An optional url value to update relation data. + relation: An optional instance of `class:ops.model.Relation` to update. + """ + # if no relation is specified update all of them + if not relation: + if not self._charm.model.relations.get(self._relation_name): + return + + relations_list = self._charm.model.relations.get(self._relation_name) + else: + relations_list = [relation] + + endpoint = self._endpoint(url or self._url) + + for relation in relations_list: + relation.data[self._charm.unit].update({"endpoint": json.dumps(endpoint)}) + + logger.debug("Saved endpoint in unit relation data") + + @property + def _url(self) -> str: + """Get local Loki Push API url. + + Return url to loki, including port number, but without the endpoint subpath. + """ + return "http://{}:{}".format(socket.getfqdn(), self.port) + + def _endpoint(self, url) -> dict: + """Get Loki push API endpoint for a given url. + + Args: + url: A loki unit URL. + + Returns: str + """ + endpoint = "/loki/api/v1/push" + return {"url": url.rstrip("/") + endpoint} + + @property + def alerts(self) -> dict: # noqa: C901 + """Fetch alerts for all relations. + + A Loki alert rules file consists of a list of "groups". Each + group consists of a list of alerts (`rules`) that are sequentially + executed. This method returns all the alert rules provided by each + related metrics provider charm. These rules may be used to generate a + separate alert rules file for each relation since the returned list + of alert groups are indexed by relation ID. Also for each relation ID + associated scrape metadata such as Juju model, UUID and application + name are provided so a unique name may be generated for the rules + file. For each relation the structure of data returned is a dictionary + with four keys + + - groups + - model + - model_uuid + - application + + The value of the `groups` key is such that it may be used to generate + a Loki alert rules file directly using `yaml.dump` but the + `groups` key itself must be included as this is required by Loki, + for example as in `yaml.dump({"groups": alerts["groups"]})`. + + Currently only accepts a list of rules and these + rules are all placed into a single group, even though Loki itself + allows for multiple groups within a single alert rules file. + + Returns: + a dictionary of alert rule groups and associated scrape + metadata indexed by relation ID. + """ + alerts = {} # type: Dict[str, dict] # mapping b/w juju identifiers and alert rule files + for relation in self._charm.model.relations[self._relation_name]: + if not relation.units or not relation.app: + continue + + alert_rules = json.loads(relation.data[relation.app].get("alert_rules", "{}")) + if not alert_rules: + continue + + alert_rules = self._inject_alert_expr_labels(alert_rules) + + identifier, topology = self._get_identifier_by_alert_rules(alert_rules) + if not topology: + try: + metadata = json.loads(relation.data[relation.app]["metadata"]) + identifier = JujuTopology.from_dict(metadata).identifier + alerts[identifier] = self._tool.apply_label_matchers(alert_rules) # type: ignore + + except KeyError as e: + logger.debug( + "Relation %s has no 'metadata': %s", + relation.id, + e, + ) + + if not identifier: + logger.error( + "Alert rules were found but no usable group or identifier was present." + ) + continue + + _, errmsg = self._tool.validate_alert_rules(alert_rules) + if errmsg: + relation.data[self._charm.app]["event"] = json.dumps({"errors": errmsg}) + continue + + alerts[identifier] = alert_rules + + return alerts + + def _get_identifier_by_alert_rules( + self, rules: dict + ) -> Tuple[Union[str, None], Union[JujuTopology, None]]: + """Determine an appropriate dict key for alert rules. + + The key is used as the filename when writing alerts to disk, so the structure + and uniqueness is important. + + Args: + rules: a dict of alert rules + Returns: + A tuple containing an identifier, if found, and a JujuTopology, if it could + be constructed. + """ + if "groups" not in rules: + logger.debug("No alert groups were found in relation data") + return None, None + + # Construct an ID based on what's in the alert rules if they have labels + for group in rules["groups"]: + try: + labels = group["rules"][0]["labels"] + topology = JujuTopology( + # Don't try to safely get required constructor fields. There's already + # a handler for KeyErrors + model_uuid=labels["juju_model_uuid"], + model=labels["juju_model"], + application=labels["juju_application"], + unit=labels.get("juju_unit", ""), + charm_name=labels.get("juju_charm", ""), + ) + return topology.identifier, topology + except KeyError: + logger.debug("Alert rules were found but no usable labels were present") + continue + + logger.warning( + "No labeled alert rules were found, and no 'scrape_metadata' " + "was available. Using the alert group name as filename." + ) + try: + for group in rules["groups"]: + return group["name"], None + except KeyError: + logger.debug("No group name was found to use as identifier") + + return None, None + + def _inject_alert_expr_labels(self, rules: Dict[str, Any]) -> Dict[str, Any]: + """Iterate through alert rules and inject topology into expressions. + + Args: + rules: a dict of alert rules + """ + if "groups" not in rules: + return rules + + modified_groups = [] + for group in rules["groups"]: + # Copy off rules, so we don't modify an object we're iterating over + rules_copy = group["rules"] + for idx, rule in enumerate(rules_copy): + labels = rule.get("labels") + + if labels: + try: + topology = JujuTopology( + # Don't try to safely get required constructor fields. There's already + # a handler for KeyErrors + model_uuid=labels["juju_model_uuid"], + model=labels["juju_model"], + application=labels["juju_application"], + unit=labels.get("juju_unit", ""), + charm_name=labels.get("juju_charm", ""), + ) + + # Inject topology and put it back in the list + rule["expr"] = self._tool.inject_label_matchers( + re.sub(r"%%juju_topology%%,?", "", rule["expr"]), + topology.label_matcher_dict, + ) + except KeyError: + # Some required JujuTopology key is missing. Just move on. + pass + + group["rules"][idx] = rule + + modified_groups.append(group) + + rules["groups"] = modified_groups + return rules + + +class ConsumerBase(Object): + """Consumer's base class.""" + + def __init__( + self, + charm: CharmBase, + relation_name: str = DEFAULT_RELATION_NAME, + alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, + recursive: bool = False, + skip_alert_topology_labeling: bool = False, + ): + super().__init__(charm, relation_name) + self._charm = charm + self._relation_name = relation_name + self.topology = JujuTopology.from_charm(charm) + + try: + alert_rules_path = _resolve_dir_against_charm_path(charm, alert_rules_path) + except InvalidAlertRulePathError as e: + logger.debug( + "Invalid Loki alert rules folder at %s: %s", + e.alert_rules_absolute_path, + e.message, + ) + self._alert_rules_path = alert_rules_path + self._skip_alert_topology_labeling = skip_alert_topology_labeling + + self._recursive = recursive + + def _handle_alert_rules(self, relation): + if not self._charm.unit.is_leader(): + return + + alert_rules = ( + AlertRules(None) if self._skip_alert_topology_labeling else AlertRules(self.topology) + ) + alert_rules.add_path(self._alert_rules_path, recursive=self._recursive) + alert_rules_as_dict = alert_rules.as_dict() + + relation.data[self._charm.app]["metadata"] = json.dumps(self.topology.as_dict()) + relation.data[self._charm.app]["alert_rules"] = json.dumps( + alert_rules_as_dict, + sort_keys=True, # sort, to prevent unnecessary relation_changed events + ) + + @property + def loki_endpoints(self) -> List[dict]: + """Fetch Loki Push API endpoints sent from LokiPushApiProvider through relation data. + + Returns: + A list of dictionaries with Loki Push API endpoints, for instance: + [ + {"url": "http://loki1:3100/loki/api/v1/push"}, + {"url": "http://loki2:3100/loki/api/v1/push"}, + ] + """ + endpoints = [] # type: list + + for relation in self._charm.model.relations[self._relation_name]: + for unit in relation.units: + if unit.app == self._charm.app: + # This is a peer unit + continue + + endpoint = relation.data[unit].get("endpoint") + if endpoint: + deserialized_endpoint = json.loads(endpoint) + endpoints.append(deserialized_endpoint) + + return endpoints + + +class LokiPushApiConsumer(ConsumerBase): + """Loki Consumer class.""" + + on = LokiPushApiEvents() # pyright: ignore + + def __init__( + self, + charm: CharmBase, + relation_name: str = DEFAULT_RELATION_NAME, + alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, + recursive: bool = True, + skip_alert_topology_labeling: bool = False, + ): + """Construct a Loki charm client. + + The `LokiPushApiConsumer` object provides configurations to a Loki client charm, such as + the Loki API endpoint to push logs. It is intended for workloads that can speak + loki_push_api (https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki), such + as grafana-agent. + (If you need to forward workload stdout logs, then use LogForwarder; if you need to forward + log files, then use LogProxyConsumer.) + + `LokiPushApiConsumer` can be instantiated as follows: + + self._loki_consumer = LokiPushApiConsumer(self) + + Args: + charm: a `CharmBase` object that manages this `LokiPushApiConsumer` object. + Typically, this is `self` in the instantiating class. + relation_name: the string name of the relation interface to look up. + If `charm` has exactly one relation with this interface, the relation's + name is returned. If none or multiple relations with the provided interface + are found, this method will raise either a NoRelationWithInterfaceFoundError or + MultipleRelationsWithInterfaceFoundError exception, respectively. + alert_rules_path: a string indicating a path where alert rules can be found + recursive: Whether to scan for rule files recursively. + skip_alert_topology_labeling: whether to skip the alert topology labeling. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `loki_push_api` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.provides` + role. + + Emits: + loki_push_api_endpoint_joined: This event is emitted when the relation between the + Charmed Operator that instantiates `LokiPushApiProvider` (Loki charm for instance) + and the Charmed Operator that instantiates `LokiPushApiConsumer` is established. + loki_push_api_endpoint_departed: This event is emitted when the relation between the + Charmed Operator that implements `LokiPushApiProvider` (Loki charm for instance) + and the Charmed Operator that implements `LokiPushApiConsumer` is removed. + loki_push_api_alert_rules_error: This event is emitted when an invalid alert rules + file is encountered or if `alert_rules_path` is empty. + """ + _validate_relation_by_interface_and_direction( + charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.requires + ) + super().__init__( + charm, relation_name, alert_rules_path, recursive, skip_alert_topology_labeling + ) + events = self._charm.on[relation_name] + self.framework.observe(self._charm.on.upgrade_charm, self._on_lifecycle_event) + self.framework.observe(events.relation_joined, self._on_logging_relation_joined) + self.framework.observe(events.relation_changed, self._on_logging_relation_changed) + self.framework.observe(events.relation_departed, self._on_logging_relation_departed) + + def _on_lifecycle_event(self, _: HookEvent): + """Update require relation data on charm upgrades and other lifecycle events. + + Args: + event: a `CharmEvent` in response to which the consumer + charm must update its relation data. + """ + # Upgrade event or other charm-level event + self._reinitialize_alert_rules() + self.on.loki_push_api_endpoint_joined.emit() + + def _on_logging_relation_joined(self, event: RelationJoinedEvent): + """Handle changes in related consumers. + + Update relation data and emit events when a relation is established. + + Args: + event: a `CharmEvent` in response to which the consumer + charm must update its relation data. + + Emits: + loki_push_api_endpoint_joined: Once the relation is established, this event is emitted. + loki_push_api_alert_rules_error: This event is emitted when an invalid alert rules + file is encountered or if `alert_rules_path` is empty. + """ + # Alert rules will not change over the lifecycle of a charm, and do not need to be + # constantly set on every relation_changed event. Leave them here. + self._handle_alert_rules(event.relation) + self.on.loki_push_api_endpoint_joined.emit() + + def _on_logging_relation_changed(self, event: RelationEvent): + """Handle changes in related consumers. + + Anytime there are changes in the relation between Loki + and its consumers charms. + + Args: + event: a `CharmEvent` in response to which the consumer + charm must update its relation data. + + Emits: + loki_push_api_endpoint_joined: Once the relation is established, this event is emitted. + loki_push_api_alert_rules_error: This event is emitted when an invalid alert rules + file is encountered or if `alert_rules_path` is empty. + """ + if self._charm.unit.is_leader(): + ev = json.loads(event.relation.data[event.app].get("event", "{}")) + + if ev: + valid = bool(ev.get("valid", True)) + errors = ev.get("errors", "") + + if valid and not errors: + self.on.alert_rule_status_changed.emit(valid=valid) + else: + self.on.alert_rule_status_changed.emit(valid=valid, errors=errors) + + self.on.loki_push_api_endpoint_joined.emit() + + def _reinitialize_alert_rules(self): + """Reloads alert rules and updates all relations.""" + for relation in self._charm.model.relations[self._relation_name]: + self._handle_alert_rules(relation) + + def _process_logging_relation_changed(self, relation: Relation): + self._handle_alert_rules(relation) + self.on.loki_push_api_endpoint_joined.emit() + + def _on_logging_relation_departed(self, _: RelationEvent): + """Handle departures in related providers. + + Anytime there are departures in relations between the consumer charm and Loki + the consumer charm is informed, through a `LokiPushApiEndpointDeparted` event. + The consumer charm can then choose to update its configuration. + """ + # Provide default to avoid throwing, as in some complicated scenarios with + # upgrades and hook failures we might not have data in the storage + self.on.loki_push_api_endpoint_departed.emit() + + +class ContainerNotFoundError(Exception): + """Raised if the specified container does not exist.""" + + def __init__(self): + msg = "The specified container does not exist." + self.message = msg + + super().__init__(self.message) + + +class PromtailDigestError(EventBase): + """Event emitted when there is an error with Promtail initialization.""" + + def __init__(self, handle, message): + super().__init__(handle) + self.message = message + + def snapshot(self): + """Save message information.""" + return {"message": self.message} + + def restore(self, snapshot): + """Restore message information.""" + self.message = snapshot["message"] + + +class LogProxyEndpointDeparted(EventBase): + """Event emitted when a Log Proxy has departed.""" + + +class LogProxyEndpointJoined(EventBase): + """Event emitted when a Log Proxy joins.""" + + +class LogProxyEvents(ObjectEvents): + """Event descriptor for events raised by `LogProxyConsumer`.""" + + promtail_digest_error = EventSource(PromtailDigestError) + log_proxy_endpoint_departed = EventSource(LogProxyEndpointDeparted) + log_proxy_endpoint_joined = EventSource(LogProxyEndpointJoined) + + +class LogProxyConsumer(ConsumerBase): + """LogProxyConsumer class. + + > Note: This object is deprecated. Consider migrating to LogForwarder with the release of Juju + > 3.6 LTS. + + The `LogProxyConsumer` object provides a method for attaching `promtail` to + a workload in order to generate structured logging data from applications + which traditionally log to syslog or do not have native Loki integration. + The `LogProxyConsumer` can be instantiated as follows: + + self._log_proxy = LogProxyConsumer( + self, + logs_scheme={ + "workload-a": { + "log-files": ["/tmp/worload-a-1.log", "/tmp/worload-a-2.log"], + "syslog-port": 1514, + }, + "workload-b": {"log-files": ["/tmp/worload-b.log"], "syslog-port": 1515}, + }, + relation_name="log-proxy", + ) + + Args: + charm: a `CharmBase` object that manages this `LokiPushApiConsumer` object. + Typically, this is `self` in the instantiating class. + logs_scheme: a dict which maps containers and a list of log files and syslog port. + relation_name: the string name of the relation interface to look up. + If `charm` has exactly one relation with this interface, the relation's + name is returned. If none or multiple relations with the provided interface + are found, this method will raise either a NoRelationWithInterfaceFoundError or + MultipleRelationsWithInterfaceFoundError exception, respectively. + containers_syslog_port: a dict which maps (and enable) containers and syslog port. + alert_rules_path: an optional path for the location of alert rules + files. Defaults to "./src/loki_alert_rules", + resolved from the directory hosting the charm entry file. + The alert rules are automatically updated on charm upgrade. + recursive: Whether to scan for rule files recursively. + promtail_resource_name: An optional promtail resource name from metadata + if it has been modified and attached + insecure_skip_verify: skip SSL verification. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `loki_push_api` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.provides` + role. + """ + + on = LogProxyEvents() # pyright: ignore + + def __init__( + self, + charm, + *, + logs_scheme=None, + relation_name: str = DEFAULT_LOG_PROXY_RELATION_NAME, + alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, + recursive: bool = False, + promtail_resource_name: Optional[str] = None, + insecure_skip_verify: bool = False, + ): + super().__init__(charm, relation_name, alert_rules_path, recursive) + self._charm = charm + self._logs_scheme = logs_scheme or {} + self._relation_name = relation_name + self.topology = JujuTopology.from_charm(charm) + self._promtail_resource_name = promtail_resource_name or "promtail-bin" + self.insecure_skip_verify = insecure_skip_verify + self._promtails_ports = self._generate_promtails_ports(logs_scheme) + + # architecture used for promtail binary + arch = platform.processor() + if arch in ["x86_64", "amd64"]: + self._arch = "amd64" + elif arch in ["aarch64", "arm64", "armv8b", "armv8l"]: + self._arch = "arm64" + else: + self._arch = arch + + events = self._charm.on[relation_name] + self.framework.observe(events.relation_created, self._on_relation_created) + self.framework.observe(events.relation_changed, self._on_relation_changed) + self.framework.observe(events.relation_departed, self._on_relation_departed) + self._observe_pebble_ready() + + def _observe_pebble_ready(self): + for container in self._containers.keys(): + snake_case_container_name = container.replace("-", "_") + self.framework.observe( + getattr(self._charm.on, f"{snake_case_container_name}_pebble_ready"), + self._on_pebble_ready, + ) + + def _on_pebble_ready(self, event: WorkloadEvent): + """Event handler for `pebble_ready`.""" + if self.model.relations[self._relation_name]: + self._setup_promtail(event.workload) + + def _on_relation_created(self, _: RelationCreatedEvent) -> None: + """Event handler for `relation_created`.""" + for container in self._containers.values(): + if container.can_connect(): + self._setup_promtail(container) + + def _on_relation_changed(self, event: RelationEvent) -> None: + """Event handler for `relation_changed`. + + Args: + event: The event object `RelationChangedEvent`. + """ + self._handle_alert_rules(event.relation) + + if self._charm.unit.is_leader(): + ev = json.loads(event.relation.data[event.app].get("event", "{}")) + + if ev: + valid = bool(ev.get("valid", True)) + errors = ev.get("errors", "") + + if valid and not errors: + self.on.alert_rule_status_changed.emit(valid=valid) + else: + self.on.alert_rule_status_changed.emit(valid=valid, errors=errors) + + for container in self._containers.values(): + if not container.can_connect(): + continue + if self.model.relations[self._relation_name]: + if "promtail" not in container.get_plan().services: + self._setup_promtail(container) + continue + + new_config = self._promtail_config(container.name) + if new_config != self._current_config(container): + container.push( + WORKLOAD_CONFIG_PATH, yaml.safe_dump(new_config), make_dirs=True + ) + + # Loki may send endpoints late. Don't necessarily start, there may be + # no clients + if new_config["clients"]: + container.restart(WORKLOAD_SERVICE_NAME) + self.on.log_proxy_endpoint_joined.emit() + else: + self.on.promtail_digest_error.emit("No promtail client endpoints available!") + + def _on_relation_departed(self, _: RelationEvent) -> None: + """Event handler for `relation_departed`. + + Args: + event: The event object `RelationDepartedEvent`. + """ + for container in self._containers.values(): + if not container.can_connect(): + continue + if not self._charm.model.relations[self._relation_name]: + container.stop(WORKLOAD_SERVICE_NAME) + continue + + new_config = self._promtail_config(container.name) + if new_config != self._current_config(container): + container.push(WORKLOAD_CONFIG_PATH, yaml.safe_dump(new_config), make_dirs=True) + + if new_config["clients"]: + container.restart(WORKLOAD_SERVICE_NAME) + else: + container.stop(WORKLOAD_SERVICE_NAME) + self.on.log_proxy_endpoint_departed.emit() + + def _add_pebble_layer(self, workload_binary_path: str, container: Container) -> None: + """Adds Pebble layer that manages Promtail service in Workload container. + + Args: + workload_binary_path: string providing path to promtail binary in workload container. + container: container into which the layer is to be added. + """ + pebble_layer = Layer( + { + "summary": "promtail layer", + "description": "pebble config layer for promtail", + "services": { + WORKLOAD_SERVICE_NAME: { + "override": "replace", + "summary": WORKLOAD_SERVICE_NAME, + "command": f"{workload_binary_path} {self._cli_args}", + "startup": "disabled", + } + }, + } + ) + container.add_layer(container.name, pebble_layer, combine=True) + + def _create_directories(self, container: Container) -> None: + """Creates the directories for Promtail binary and config file.""" + container.make_dir(path=WORKLOAD_BINARY_DIR, make_parents=True) + container.make_dir(path=WORKLOAD_CONFIG_DIR, make_parents=True) + + def _obtain_promtail(self, promtail_info: dict, container: Container) -> None: + """Obtain promtail binary from an attached resource or download it. + + Args: + promtail_info: dictionary containing information about promtail binary + that must be used. The dictionary must have three keys + - "filename": filename of promtail binary + - "zipsha": sha256 sum of zip file of promtail binary + - "binsha": sha256 sum of unpacked promtail binary + container: container into which promtail is to be obtained. + """ + workload_binary_path = os.path.join(WORKLOAD_BINARY_DIR, promtail_info["filename"]) + if self._promtail_attached_as_resource: + self._push_promtail_if_attached(container, workload_binary_path) + return + + if self._promtail_must_be_downloaded(promtail_info): + self._download_and_push_promtail_to_workload(container, promtail_info) + else: + binary_path = os.path.join(BINARY_DIR, promtail_info["filename"]) + self._push_binary_to_workload(container, binary_path, workload_binary_path) + + def _push_binary_to_workload( + self, container: Container, binary_path: str, workload_binary_path: str + ) -> None: + """Push promtail binary into workload container. + + Args: + binary_path: path in charm container from which promtail binary is read. + workload_binary_path: path in workload container to which promtail binary is pushed. + container: container into which promtail is to be uploaded. + """ + with open(binary_path, "rb") as f: + container.push(workload_binary_path, f, permissions=0o755, make_dirs=True) + logger.debug("The promtail binary file has been pushed to the workload container.") + + @property + def _promtail_attached_as_resource(self) -> bool: + """Checks whether Promtail binary is attached to the charm or not. + + Returns: + a boolean representing whether Promtail binary is attached as a resource or not. + """ + try: + self._charm.model.resources.fetch(self._promtail_resource_name) + return True + except ModelError: + return False + except NameError as e: + if "invalid resource name" in str(e): + return False + raise + + def _push_promtail_if_attached(self, container: Container, workload_binary_path: str) -> bool: + """Checks whether Promtail binary is attached to the charm or not. + + Args: + workload_binary_path: string specifying expected path of promtail + in workload container + container: container into which promtail is to be pushed. + + Returns: + a boolean representing whether Promtail binary is attached or not. + """ + logger.info("Promtail binary file has been obtained from an attached resource.") + resource_path = self._charm.model.resources.fetch(self._promtail_resource_name) + self._push_binary_to_workload(container, resource_path, workload_binary_path) + return True + + def _promtail_must_be_downloaded(self, promtail_info: dict) -> bool: + """Checks whether promtail binary must be downloaded or not. + + Args: + promtail_info: dictionary containing information about promtail binary + that must be used. The dictionary must have three keys + - "filename": filename of promtail binary + - "zipsha": sha256 sum of zip file of promtail binary + - "binsha": sha256 sum of unpacked promtail binary + + Returns: + a boolean representing whether Promtail binary must be downloaded or not. + """ + binary_path = os.path.join(BINARY_DIR, promtail_info["filename"]) + if not self._is_promtail_binary_in_charm(binary_path): + return True + + if not self._sha256sums_matches(binary_path, promtail_info["binsha"]): + return True + + logger.debug("Promtail binary file is already in the the charm container.") + return False + + def _sha256sums_matches(self, file_path: str, sha256sum: str) -> bool: + """Checks whether a file's sha256sum matches or not with a specific sha256sum. + + Args: + file_path: A string representing the files' patch. + sha256sum: The sha256sum against which we want to verify. + + Returns: + a boolean representing whether a file's sha256sum matches or not with + a specific sha256sum. + """ + try: + with open(file_path, "rb") as f: + file_bytes = f.read() + result = sha256(file_bytes).hexdigest() + + if result != sha256sum: + msg = "File sha256sum mismatch, expected:'{}' but got '{}'".format( + sha256sum, result + ) + logger.debug(msg) + return False + + return True + except (APIError, FileNotFoundError): + msg = "File: '{}' could not be opened".format(file_path) + logger.error(msg) + return False + + def _is_promtail_binary_in_charm(self, binary_path: str) -> bool: + """Check if Promtail binary is already stored in charm container. + + Args: + binary_path: string path of promtail binary to check + + Returns: + a boolean representing whether Promtail is present or not. + """ + return True if Path(binary_path).is_file() else False + + def _download_and_push_promtail_to_workload( + self, container: Container, promtail_info: dict + ) -> None: + """Downloads a Promtail zip file and pushes the binary to the workload. + + Args: + promtail_info: dictionary containing information about promtail binary + that must be used. The dictionary must have three keys + - "filename": filename of promtail binary + - "zipsha": sha256 sum of zip file of promtail binary + - "binsha": sha256 sum of unpacked promtail binary + container: container into which promtail is to be uploaded. + """ + # Check for Juju proxy variables and fall back to standard ones if not set + # If no Juju proxy variable was set, we set proxies to None to let the ProxyHandler get + # the proxy env variables from the environment + proxies = { + # The ProxyHandler uses only the protocol names as keys + # https://docs.python.org/3/library/urllib.request.html#urllib.request.ProxyHandler + "https": os.environ.get("JUJU_CHARM_HTTPS_PROXY", ""), + "http": os.environ.get("JUJU_CHARM_HTTP_PROXY", ""), + # The ProxyHandler uses `no` for the no_proxy key + # https://github.com/python/cpython/blob/3.12/Lib/urllib/request.py#L2553 + "no": os.environ.get("JUJU_CHARM_NO_PROXY", ""), + } + proxies = {k: v for k, v in proxies.items() if v != ""} or None + + proxy_handler = request.ProxyHandler(proxies) + opener = request.build_opener(proxy_handler) + + with opener.open(promtail_info["url"]) as r: + file_bytes = r.read() + file_path = os.path.join(BINARY_DIR, promtail_info["filename"] + ".gz") + with open(file_path, "wb") as f: + f.write(file_bytes) + logger.info( + "Promtail binary zip file has been downloaded and stored in: %s", + file_path, + ) + + decompressed_file = GzipFile(fileobj=BytesIO(file_bytes)) + binary_path = os.path.join(BINARY_DIR, promtail_info["filename"]) + with open(binary_path, "wb") as outfile: + outfile.write(decompressed_file.read()) + logger.debug("Promtail binary file has been downloaded.") + + workload_binary_path = os.path.join(WORKLOAD_BINARY_DIR, promtail_info["filename"]) + self._push_binary_to_workload(container, binary_path, workload_binary_path) + + @property + def _cli_args(self) -> str: + """Return the cli arguments to pass to promtail. + + Returns: + The arguments as a string + """ + return "-config.file={}".format(WORKLOAD_CONFIG_PATH) + + def _current_config(self, container) -> dict: + """Property that returns the current Promtail configuration. + + Returns: + A dict containing Promtail configuration. + """ + if not container.can_connect(): + logger.debug("Could not connect to promtail container!") + return {} + try: + raw_current = container.pull(WORKLOAD_CONFIG_PATH).read() + return yaml.safe_load(raw_current) + except (ProtocolError, PathError) as e: + logger.warning( + "Could not check the current promtail configuration due to " + "a failure in retrieving the file: %s", + e, + ) + return {} + + def _promtail_config(self, container_name: str) -> dict: + """Generates the config file for Promtail. + + Reference: https://grafana.com/docs/loki/latest/send-data/promtail/configuration + """ + config = {"clients": self._clients_list()} + if self.insecure_skip_verify: + for client in config["clients"]: + client["tls_config"] = {"insecure_skip_verify": True} + + config.update(self._server_config(container_name)) + config.update(self._positions) + config.update(self._scrape_configs(container_name)) + return config + + def _clients_list(self) -> list: + """Generates a list of clients for use in the promtail config. + + Returns: + A list of endpoints + """ + return self.loki_endpoints + + def _server_config(self, container_name: str) -> dict: + """Generates the server section of the Promtail config file. + + Returns: + A dict representing the `server` section. + """ + return { + "server": { + "http_listen_port": self._promtails_ports[container_name]["http_listen_port"], + "grpc_listen_port": self._promtails_ports[container_name]["grpc_listen_port"], + } + } + + @property + def _positions(self) -> dict: + """Generates the positions section of the Promtail config file. + + Returns: + A dict representing the `positions` section. + """ + return {"positions": {"filename": WORKLOAD_POSITIONS_PATH}} + + def _scrape_configs(self, container_name: str) -> dict: + """Generates the scrape_configs section of the Promtail config file. + + Returns: + A dict representing the `scrape_configs` section. + """ + job_name = f"juju_{self.topology.identifier}" + + # The new JujuTopology doesn't include unit, but LogProxyConsumer should have it + common_labels = { + f"juju_{k}": v + for k, v in self.topology.as_dict(remapped_keys={"charm_name": "charm"}).items() + } + common_labels["container"] = container_name + scrape_configs = [] + + # Files config + labels = common_labels.copy() + labels.update( + { + "job": job_name, + "__path__": "", + } + ) + config = {"targets": ["localhost"], "labels": labels} + scrape_config = { + "job_name": "system", + "static_configs": self._generate_static_configs(config, container_name), + } + scrape_configs.append(scrape_config) + + # Syslog config + syslog_port = self._logs_scheme.get(container_name, {}).get("syslog-port") + if syslog_port: + relabel_mappings = [ + "severity", + "facility", + "hostname", + "app_name", + "proc_id", + "msg_id", + ] + syslog_labels = common_labels.copy() + syslog_labels.update({"job": f"{job_name}_syslog"}) + syslog_config = { + "job_name": "syslog", + "syslog": { + "listen_address": f"127.0.0.1:{syslog_port}", + "label_structured_data": True, + "labels": syslog_labels, + }, + "relabel_configs": [ + {"source_labels": [f"__syslog_message_{val}"], "target_label": val} + for val in relabel_mappings + ] + + [{"action": "labelmap", "regex": "__syslog_message_sd_(.+)"}], + } + scrape_configs.append(syslog_config) # type: ignore + + return {"scrape_configs": scrape_configs} + + def _generate_static_configs(self, config: dict, container_name: str) -> list: + """Generates static_configs section. + + Returns: + - a list of dictionaries representing static_configs section + """ + static_configs = [] + + for _file in self._logs_scheme.get(container_name, {}).get("log-files", []): + conf = deepcopy(config) + conf["labels"]["__path__"] = _file + static_configs.append(conf) + + return static_configs + + def _setup_promtail(self, container: Container) -> None: + # Use the first + relations = self._charm.model.relations[self._relation_name] + if len(relations) > 1: + logger.debug( + "Multiple log_proxy relations. Getting Promtail from application {}".format( + relations[0].app.name + ) + ) + relation = relations[0] + promtail_binaries = json.loads( + relation.data[relation.app].get("promtail_binary_zip_url", "{}") + ) + if not promtail_binaries: + return + + self._create_directories(container) + self._ensure_promtail_binary(promtail_binaries, container) + + container.push( + WORKLOAD_CONFIG_PATH, + yaml.safe_dump(self._promtail_config(container.name)), + make_dirs=True, + ) + + workload_binary_path = os.path.join( + WORKLOAD_BINARY_DIR, promtail_binaries[self._arch]["filename"] + ) + self._add_pebble_layer(workload_binary_path, container) + + if self._current_config(container).get("clients"): + try: + container.restart(WORKLOAD_SERVICE_NAME) + except ChangeError as e: + self.on.promtail_digest_error.emit(str(e)) + else: + self.on.log_proxy_endpoint_joined.emit() + else: + self.on.promtail_digest_error.emit("No promtail client endpoints available!") + + def _ensure_promtail_binary(self, promtail_binaries: dict, container: Container): + if self._is_promtail_installed(promtail_binaries[self._arch], container): + return + + try: + self._obtain_promtail(promtail_binaries[self._arch], container) + except URLError as e: + msg = f"Promtail binary couldn't be downloaded - {str(e)}" + logger.warning(msg) + self.on.promtail_digest_error.emit(msg) + + def _is_promtail_installed(self, promtail_info: dict, container: Container) -> bool: + """Determine if promtail has already been installed to the container. + + Args: + promtail_info: dictionary containing information about promtail binary + that must be used. The dictionary must at least contain a key + "filename" giving the name of promtail binary + container: container in which to check whether promtail is installed. + """ + workload_binary_path = f"{WORKLOAD_BINARY_DIR}/{promtail_info['filename']}" + try: + container.list_files(workload_binary_path) + except (APIError, FileNotFoundError): + return False + return True + + def _generate_promtails_ports(self, logs_scheme) -> dict: + return { + container: { + "http_listen_port": HTTP_LISTEN_PORT_START + 2 * i, + "grpc_listen_port": GRPC_LISTEN_PORT_START + 2 * i, + } + for i, container in enumerate(logs_scheme.keys()) + } + + def syslog_port(self, container_name: str) -> str: + """Gets the port on which promtail is listening for syslog in this container. + + Returns: + A str representing the port + """ + return str(self._logs_scheme.get(container_name, {}).get("syslog-port")) + + def rsyslog_config(self, container_name: str) -> str: + """Generates a config line for use with rsyslog. + + Returns: + The rsyslog config line as a string + """ + return 'action(type="omfwd" protocol="tcp" target="127.0.0.1" port="{}" Template="RSYSLOG_SyslogProtocol23Format" TCP_Framing="octet-counted")'.format( + self._logs_scheme.get(container_name, {}).get("syslog-port") + ) + + @property + def _containers(self) -> Dict[str, Container]: + return {cont: self._charm.unit.get_container(cont) for cont in self._logs_scheme.keys()} + + +class _PebbleLogClient: + @staticmethod + def check_juju_version() -> bool: + """Make sure the Juju version supports Log Forwarding.""" + juju_version = JujuVersion.from_environ() + if not juju_version > JujuVersion(version=str("3.3")): + msg = f"Juju version {juju_version} does not support Pebble log forwarding. Juju >= 3.4 is needed." + logger.warning(msg) + return False + return True + + @staticmethod + def _build_log_target( + unit_name: str, loki_endpoint: str, topology: JujuTopology, enable: bool + ) -> Dict: + """Build a log target for the log forwarding Pebble layer. + + Log target's syntax for enabling/disabling forwarding is explained here: + https://github.com/canonical/pebble?tab=readme-ov-file#log-forwarding + """ + services_value = ["all"] if enable else ["-all"] + + log_target = { + "override": "replace", + "services": services_value, + "type": "loki", + "location": loki_endpoint, + } + if enable: + log_target.update( + { + "labels": { + "product": "Juju", + "charm": topology._charm_name, + "juju_model": topology._model, + "juju_model_uuid": topology._model_uuid, + "juju_application": topology._application, + "juju_unit": topology._unit, + }, + } + ) + + return {unit_name: log_target} + + @staticmethod + def _build_log_targets( + loki_endpoints: Optional[Dict[str, str]], topology: JujuTopology, enable: bool + ): + """Build all the targets for the log forwarding Pebble layer.""" + targets = {} + if not loki_endpoints: + return targets + + for unit_name, endpoint in loki_endpoints.items(): + targets.update( + _PebbleLogClient._build_log_target( + unit_name=unit_name, + loki_endpoint=endpoint, + topology=topology, + enable=enable, + ) + ) + return targets + + @staticmethod + def disable_inactive_endpoints( + container: Container, active_endpoints: Dict[str, str], topology: JujuTopology + ): + """Disable forwarding for inactive endpoints by checking against the Pebble plan.""" + pebble_layer = container.get_plan().to_dict().get("log-targets", None) + if not pebble_layer: + return + + for unit_name, target in pebble_layer.items(): + # If the layer is a disabled log forwarding endpoint, skip it + if "-all" in target["services"]: # pyright: ignore + continue + + if unit_name not in active_endpoints: + layer = Layer( + { # pyright: ignore + "log-targets": _PebbleLogClient._build_log_targets( + loki_endpoints={unit_name: "(removed)"}, + topology=topology, + enable=False, + ) + } + ) + container.add_layer(f"{container.name}-log-forwarding", layer=layer, combine=True) + + @staticmethod + def enable_endpoints( + container: Container, active_endpoints: Dict[str, str], topology: JujuTopology + ): + """Enable forwarding for the specified Loki endpoints.""" + layer = Layer( + { # pyright: ignore + "log-targets": _PebbleLogClient._build_log_targets( + loki_endpoints=active_endpoints, + topology=topology, + enable=True, + ) + } + ) + container.add_layer(f"{container.name}-log-forwarding", layer, combine=True) + + +class LogForwarder(ConsumerBase): + """Forward the standard outputs of all workloads operated by a charm to one or multiple Loki endpoints. + + This class implements Pebble log forwarding. Juju >= 3.4 is needed. + """ + + def __init__( + self, + charm: CharmBase, + *, + relation_name: str = DEFAULT_RELATION_NAME, + alert_rules_path: str = DEFAULT_ALERT_RULES_RELATIVE_PATH, + recursive: bool = True, + skip_alert_topology_labeling: bool = False, + ): + _PebbleLogClient.check_juju_version() + super().__init__( + charm, relation_name, alert_rules_path, recursive, skip_alert_topology_labeling + ) + self._charm = charm + self._relation_name = relation_name + + on = self._charm.on[self._relation_name] + self.framework.observe(on.relation_joined, self._update_logging) + self.framework.observe(on.relation_changed, self._update_logging) + self.framework.observe(on.relation_departed, self._update_logging) + self.framework.observe(on.relation_broken, self._update_logging) + + for container_name in self._charm.meta.containers.keys(): + snake_case_container_name = container_name.replace("-", "_") + self.framework.observe( + getattr(self._charm.on, f"{snake_case_container_name}_pebble_ready"), + self._on_pebble_ready, + ) + + def _on_pebble_ready(self, event: PebbleReadyEvent): + if not (loki_endpoints := self._retrieve_endpoints_from_relation()): + logger.warning("No Loki endpoints available") + return + + self._update_endpoints(event.workload, loki_endpoints) + + def _update_logging(self, event: RelationEvent): + """Update the log forwarding to match the active Loki endpoints.""" + if not (loki_endpoints := self._retrieve_endpoints_from_relation()): + logger.warning("No Loki endpoints available") + return + + for container in self._charm.unit.containers.values(): + if container.can_connect(): + self._update_endpoints(container, loki_endpoints) + # else: `_update_endpoints` will be called on pebble-ready anyway. + + self._handle_alert_rules(event.relation) + + def _retrieve_endpoints_from_relation(self) -> dict: + loki_endpoints = {} + + # Get the endpoints from relation data + for relation in self._charm.model.relations[self._relation_name]: + loki_endpoints.update(self._fetch_endpoints(relation)) + + return loki_endpoints + + def _update_endpoints(self, container: Container, loki_endpoints: dict): + _PebbleLogClient.disable_inactive_endpoints( + container=container, + active_endpoints=loki_endpoints, + topology=self.topology, + ) + _PebbleLogClient.enable_endpoints( + container=container, active_endpoints=loki_endpoints, topology=self.topology + ) + + def is_ready(self, relation: Optional[Relation] = None): + """Check if the relation is active and healthy.""" + if not relation: + relations = self._charm.model.relations[self._relation_name] + if not relations: + return False + return all(self.is_ready(relation) for relation in relations) + + try: + if self._extract_urls(relation): + return True + return False + except (KeyError, json.JSONDecodeError): + return False + + def _extract_urls(self, relation: Relation) -> Dict[str, str]: + """Default getter function to extract Loki endpoints from a relation. + + Returns: + A dictionary of remote units and the respective Loki endpoint. + { + "loki/0": "http://loki:3100/loki/api/v1/push", + "another-loki/0": "http://another-loki:3100/loki/api/v1/push", + } + """ + endpoints: Dict = {} + + for unit in relation.units: + endpoint = relation.data[unit]["endpoint"] + deserialized_endpoint = json.loads(endpoint) + url = deserialized_endpoint["url"] + endpoints[unit.name] = url + + return endpoints + + def _fetch_endpoints(self, relation: Relation) -> Dict[str, str]: + """Fetch Loki Push API endpoints from relation data using the endpoints getter.""" + endpoints: Dict = {} + + if not self.is_ready(relation): + logger.warning(f"The relation '{relation.name}' is not ready yet.") + return endpoints + + # if the code gets here, the function won't raise anymore because it's + # also called in is_ready() + endpoints = self._extract_urls(relation) + + return endpoints + + +class CosTool: + """Uses cos-tool to inject label matchers into alert rule expressions and validate rules.""" + + _path = None + _disabled = False + + def __init__(self, charm): + self._charm = charm + + @property + def path(self): + """Lazy lookup of the path of cos-tool.""" + if self._disabled: + return None + if not self._path: + self._path = self._get_tool_path() + if not self._path: + logger.debug("Skipping injection of juju topology as label matchers") + self._disabled = True + return self._path + + def apply_label_matchers(self, rules) -> dict: + """Will apply label matchers to the expression of all alerts in all supplied groups.""" + if not self.path: + return rules + for group in rules["groups"]: + rules_in_group = group.get("rules", []) + for rule in rules_in_group: + topology = {} + # if the user for some reason has provided juju_unit, we'll need to honor it + # in most cases, however, this will be empty + for label in [ + "juju_model", + "juju_model_uuid", + "juju_application", + "juju_charm", + "juju_unit", + ]: + if label in rule["labels"]: + topology[label] = rule["labels"][label] + + rule["expr"] = self.inject_label_matchers(rule["expr"], topology) + return rules + + def validate_alert_rules(self, rules: dict) -> Tuple[bool, str]: + """Will validate correctness of alert rules, returning a boolean and any errors.""" + if not self.path: + logger.debug("`cos-tool` unavailable. Not validating alert correctness.") + return True, "" + + with tempfile.TemporaryDirectory() as tmpdir: + rule_path = Path(tmpdir + "/validate_rule.yaml") + + # Smash "our" rules format into what upstream actually uses, which is more like: + # + # groups: + # - name: foo + # rules: + # - alert: SomeAlert + # expr: up + # - alert: OtherAlert + # expr: up + transformed_rules = {"groups": []} # type: ignore + for rule in rules["groups"]: + transformed_rules["groups"].append(rule) + + rule_path.write_text(yaml.dump(transformed_rules)) + args = [str(self.path), "--format", "logql", "validate", str(rule_path)] + # noinspection PyBroadException + try: + self._exec(args) + return True, "" + except subprocess.CalledProcessError as e: + logger.debug("Validating the rules failed: %s", e.output) + return False, ", ".join([line for line in e.output if "error validating" in line]) + + def inject_label_matchers(self, expression, topology) -> str: + """Add label matchers to an expression.""" + if not topology: + return expression + if not self.path: + logger.debug("`cos-tool` unavailable. Leaving expression unchanged: %s", expression) + return expression + args = [str(self.path), "--format", "logql", "transform"] + args.extend( + ["--label-matcher={}={}".format(key, value) for key, value in topology.items()] + ) + + args.extend(["{}".format(expression)]) + # noinspection PyBroadException + try: + return self._exec(args) + except subprocess.CalledProcessError as e: + logger.debug('Applying the expression failed: "%s", falling back to the original', e) + print('Applying the expression failed: "{}", falling back to the original'.format(e)) + return expression + + def _get_tool_path(self) -> Optional[Path]: + arch = platform.processor() + arch = "amd64" if arch == "x86_64" else arch + res = "cos-tool-{}".format(arch) + try: + path = Path(res).resolve() + path.chmod(0o777) + return path + except NotImplementedError: + logger.debug("System lacks support for chmod") + except FileNotFoundError: + logger.debug('Could not locate cos-tool at: "{}"'.format(res)) + return None + + def _exec(self, cmd) -> str: + result = subprocess.run(cmd, check=True, stdout=subprocess.PIPE) + output = result.stdout.decode("utf-8").strip() + return output + + +def charm_logging_config( + endpoint_requirer: LokiPushApiConsumer, cert_path: Optional[Union[Path, str]] +) -> Tuple[Optional[List[str]], Optional[str]]: + """Utility function to determine the charm_logging config you will likely want. + + If no endpoint is provided: + disable charm logging. + If https endpoint is provided but cert_path is not found on disk: + disable charm logging. + If https endpoint is provided and cert_path is None: + ERROR + Else: + proceed with charm logging (with or without tls, as appropriate) + + Args: + endpoint_requirer: an instance of LokiPushApiConsumer. + cert_path: a path where a cert is stored. + + Returns: + A tuple with (optionally) the values of the endpoints and the certificate path. + + Raises: + LokiPushApiError: if some endpoint are http and others https. + """ + endpoints = [ep["url"] for ep in endpoint_requirer.loki_endpoints] + if not endpoints: + return None, None + + https = tuple(endpoint.startswith("https://") for endpoint in endpoints) + + if all(https): # all endpoints are https + if cert_path is None: + raise LokiPushApiError("Cannot send logs to https endpoints without a certificate.") + if not Path(cert_path).exists(): + # if endpoints is https BUT we don't have a server_cert yet: + # disable charm logging until we do to prevent tls errors + return None, None + return endpoints, str(cert_path) + + if all(not x for x in https): # all endpoints are http + return endpoints, None + + # if there's a disagreement, that's very weird: + raise LokiPushApiError("Some endpoints are http, some others are https. That's not good.") diff --git a/examples/go/charm/lib/charms/prometheus_k8s/v0/prometheus_scrape.py b/examples/go/charm/lib/charms/prometheus_k8s/v0/prometheus_scrape.py index e3d35c6..ca554fb 100644 --- a/examples/go/charm/lib/charms/prometheus_k8s/v0/prometheus_scrape.py +++ b/examples/go/charm/lib/charms/prometheus_k8s/v0/prometheus_scrape.py @@ -362,7 +362,7 @@ def _on_scrape_targets_changed(self, event): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 47 +LIBPATCH = 48 PYDEPS = ["cosl"] @@ -2364,12 +2364,9 @@ def _get_tool_path(self) -> Optional[Path]: arch = "amd64" if arch == "x86_64" else arch res = "cos-tool-{}".format(arch) try: - path = Path(res).resolve() - path.chmod(0o777) + path = Path(res).resolve(strict=True) return path - except NotImplementedError: - logger.debug("System lacks support for chmod") - except FileNotFoundError: + except (FileNotFoundError, OSError): logger.debug('Could not locate cos-tool at: "{}"'.format(res)) return None diff --git a/examples/go/charm/lib/charms/redis_k8s/v0/redis.py b/examples/go/charm/lib/charms/redis_k8s/v0/redis.py index a731507..e28b14c 100644 --- a/examples/go/charm/lib/charms/redis_k8s/v0/redis.py +++ b/examples/go/charm/lib/charms/redis_k8s/v0/redis.py @@ -39,7 +39,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version. -LIBPATCH = 6 +LIBPATCH = 7 logger = logging.getLogger(__name__) @@ -78,6 +78,18 @@ def _on_relation_broken(self, event): # Trigger an event that our charm can react to. self.charm.on.redis_relation_updated.emit() + @property + def app_data(self) -> Optional[Dict[str, str]]: + """Retrieve the app data. + + Returns: + Dict: dict containing the app data. + """ + relation = self.model.get_relation(self.relation_name) + if not relation: + return None + return relation.data[relation.app] + @property def relation_data(self) -> Optional[Dict[str, str]]: """Retrieve the relation data. @@ -98,10 +110,16 @@ def url(self) -> Optional[str]: Returns: str: the Redis URL. """ - relation_data = self.relation_data - if not relation_data: + if not (relation_data := self.relation_data): return None + redis_host = relation_data.get("hostname") + + if app_data := self.app_data: + try: + redis_host = self.app_data.get("leader-host", redis_host) + except KeyError: + pass redis_port = relation_data.get("port") return f"redis://{redis_host}:{redis_port}" diff --git a/examples/go/charm/lib/charms/saml_integrator/v0/saml.py b/examples/go/charm/lib/charms/saml_integrator/v0/saml.py index 722b1c6..8fd1610 100644 --- a/examples/go/charm/lib/charms/saml_integrator/v0/saml.py +++ b/examples/go/charm/lib/charms/saml_integrator/v0/saml.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2025 Canonical Ltd. +# Copyright 2024 Canonical Ltd. # Licensed under the Apache2.0. See LICENSE file in charm source for details. """Library to manage the relation data for the SAML Integrator charm. @@ -68,7 +68,7 @@ class method `from_relation_data`. # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 9 +LIBPATCH = 10 # pylint: disable=wrong-import-position import re @@ -92,7 +92,7 @@ class SamlEndpoint(BaseModel): """ name: str = Field(..., min_length=1) - url: AnyHttpUrl + url: typing.Optional[AnyHttpUrl] binding: str = Field(..., min_length=1) response_url: typing.Optional[AnyHttpUrl] @@ -108,7 +108,8 @@ def to_relation_data(self) -> typing.Dict[str, str]: # Transform name into snakecase lowercase_name = re.sub(r"(? "SamlEndpoi prefix = f"{lowercase_name}_{http_method}_" return cls( name=name, - url=parse_obj_as(AnyHttpUrl, relation_data[f"{prefix}url"]), + url=( + parse_obj_as(AnyHttpUrl, relation_data[f"{prefix}url"]) + if relation_data[f"{prefix}url"] + else None + ), binding=relation_data[f"{prefix}binding"], response_url=( parse_obj_as(AnyHttpUrl, relation_data[f"{prefix}response_url"]) @@ -158,7 +163,7 @@ class SamlRelationData(BaseModel): """ entity_id: str = Field(..., min_length=1) - metadata_url: AnyHttpUrl + metadata_url: typing.Optional[AnyHttpUrl] certificates: typing.Tuple[str, ...] endpoints: typing.Tuple[SamlEndpoint, ...] @@ -170,9 +175,10 @@ def to_relation_data(self) -> typing.Dict[str, str]: """ result = { "entity_id": self.entity_id, - "metadata_url": str(self.metadata_url), "x509certs": ",".join(self.certificates), } + if self.metadata_url: + result["metadata_url"] = str(self.metadata_url) for endpoint in self.endpoints: result.update(endpoint.to_relation_data()) return result @@ -201,8 +207,10 @@ def from_relation_data(cls, relation_data: ops.RelationDataContent) -> "SamlRela endpoints.sort(key=lambda ep: ep.name) return cls( entity_id=relation_data.get("entity_id"), # type: ignore - metadata_url=parse_obj_as( - AnyHttpUrl, relation_data.get("metadata_url") + metadata_url=( + parse_obj_as(AnyHttpUrl, relation_data.get("metadata_url")) + if relation_data.get("metadata_url") + else None ), # type: ignore certificates=tuple(relation_data.get("x509certs").split(",")), # type: ignore endpoints=tuple(endpoints), @@ -232,7 +240,7 @@ def entity_id(self) -> str: return self.saml_relation_data.entity_id @property - def metadata_url(self) -> str: + def metadata_url(self) -> typing.Optional[str]: """Fetch the SAML metadata URL from the relation.""" return str(self.saml_relation_data.metadata_url) diff --git a/examples/go/charm/lib/charms/tempo_coordinator_k8s/v0/tracing.py b/examples/go/charm/lib/charms/tempo_coordinator_k8s/v0/tracing.py new file mode 100644 index 0000000..27144fa --- /dev/null +++ b/examples/go/charm/lib/charms/tempo_coordinator_k8s/v0/tracing.py @@ -0,0 +1,987 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. +"""## Overview. + +This document explains how to integrate with the Tempo charm for the purpose of pushing traces to a +tracing endpoint provided by Tempo. It also explains how alternative implementations of the Tempo charm +may maintain the same interface and be backward compatible with all currently integrated charms. + +## Requirer Library Usage + +Charms seeking to push traces to Tempo, must do so using the `TracingEndpointRequirer` +object from this charm library. For the simplest use cases, using the `TracingEndpointRequirer` +object only requires instantiating it, typically in the constructor of your charm. The +`TracingEndpointRequirer` constructor requires the name of the relation over which a tracing endpoint + is exposed by the Tempo charm, and a list of protocols it intends to send traces with. + This relation must use the `tracing` interface. + The `TracingEndpointRequirer` object may be instantiated as follows + + from charms.tempo_coordinator_k8s.v0.tracing import TracingEndpointRequirer + + def __init__(self, *args): + super().__init__(*args) + # ... + self.tracing = TracingEndpointRequirer(self, + protocols=['otlp_grpc', 'otlp_http', 'jaeger_http_thrift'] + ) + # ... + +Note that the first argument (`self`) to `TracingEndpointRequirer` is always a reference to the +parent charm. + +Alternatively to providing the list of requested protocols at init time, the charm can do it at +any point in time by calling the +`TracingEndpointRequirer.request_protocols(*protocol:str, relation:Optional[Relation])` method. +Using this method also allows you to use per-relation protocols. + +Units of requirer charms obtain the tempo endpoint to which they will push their traces by calling +`TracingEndpointRequirer.get_endpoint(protocol: str)`, where `protocol` is, for example: +- `otlp_grpc` +- `otlp_http` +- `zipkin` +- `tempo` + +If the `protocol` is not in the list of protocols that the charm requested at endpoint set-up time, +the library will raise an error. + +We recommend that you scale up your tracing provider and relate it to an ingress so that your tracing requests +go through the ingress and get load balanced across all units. Otherwise, if the provider's leader goes down, your tracing goes down. + +## Provider Library Usage + +The `TracingEndpointProvider` object may be used by charms to manage relations with their +trace sources. For this purposes a Tempo-like charm needs to do two things + +1. Instantiate the `TracingEndpointProvider` object by providing it a +reference to the parent (Tempo) charm and optionally the name of the relation that the Tempo charm +uses to interact with its trace sources. This relation must conform to the `tracing` interface +and it is strongly recommended that this relation be named `tracing` which is its +default value. + +For example a Tempo charm may instantiate the `TracingEndpointProvider` in its constructor as +follows + + from charms.tempo_coordinator_k8s.v0.tracing import TracingEndpointProvider + + def __init__(self, *args): + super().__init__(*args) + # ... + self.tracing = TracingEndpointProvider(self) + # ... + + + +""" # noqa: W505 +import enum +import json +import logging +from pathlib import Path +from typing import ( + TYPE_CHECKING, + Any, + Dict, + List, + Literal, + MutableMapping, + Optional, + Sequence, + Tuple, + Union, + cast, +) + +import pydantic +from ops.charm import ( + CharmBase, + CharmEvents, + RelationBrokenEvent, + RelationEvent, + RelationRole, +) +from ops.framework import EventSource, Object +from ops.model import ModelError, Relation +from pydantic import BaseModel, Field + +# The unique Charmhub library identifier, never change it +LIBID = "d2f02b1f8d1244b5989fd55bc3a28943" + +# Increment this major API version when introducing breaking changes +LIBAPI = 0 + +# Increment this PATCH version before using `charmcraft publish-lib` or reset +# to 0 if you are raising the major API version +LIBPATCH = 5 + +PYDEPS = ["pydantic"] + +logger = logging.getLogger(__name__) + +DEFAULT_RELATION_NAME = "tracing" +RELATION_INTERFACE_NAME = "tracing" + +# Supported list rationale https://github.com/canonical/tempo-coordinator-k8s-operator/issues/8 +ReceiverProtocol = Literal[ + "zipkin", + "otlp_grpc", + "otlp_http", + "jaeger_grpc", + "jaeger_thrift_http", +] + +RawReceiver = Tuple[ReceiverProtocol, str] +"""Helper type. A raw receiver is defined as a tuple consisting of the protocol name, and the (external, if available), +(secured, if available) resolvable server url. +""" + +BUILTIN_JUJU_KEYS = {"ingress-address", "private-address", "egress-subnets"} + + +class TransportProtocolType(str, enum.Enum): + """Receiver Type.""" + + http = "http" + grpc = "grpc" + + +receiver_protocol_to_transport_protocol: Dict[ReceiverProtocol, TransportProtocolType] = { + "zipkin": TransportProtocolType.http, + "otlp_grpc": TransportProtocolType.grpc, + "otlp_http": TransportProtocolType.http, + "jaeger_thrift_http": TransportProtocolType.http, + "jaeger_grpc": TransportProtocolType.grpc, +} +"""A mapping between telemetry protocols and their corresponding transport protocol. +""" + + +class TracingError(Exception): + """Base class for custom errors raised by this library.""" + + +class NotReadyError(TracingError): + """Raised by the provider wrapper if a requirer hasn't published the required data (yet).""" + + +class ProtocolNotRequestedError(TracingError): + """Raised if the user attempts to obtain an endpoint for a protocol it did not request.""" + + +class DataValidationError(TracingError): + """Raised when data validation fails on IPU relation data.""" + + +class AmbiguousRelationUsageError(TracingError): + """Raised when one wrongly assumes that there can only be one relation on an endpoint.""" + + +if int(pydantic.version.VERSION.split(".")[0]) < 2: + + class DatabagModel(BaseModel): # type: ignore + """Base databag model.""" + + class Config: + """Pydantic config.""" + + # ignore any extra fields in the databag + extra = "ignore" + """Ignore any extra fields in the databag.""" + allow_population_by_field_name = True + """Allow instantiating this class by field name (instead of forcing alias).""" + + _NEST_UNDER = None + + @classmethod + def load(cls, databag: MutableMapping): + """Load this model from a Juju databag.""" + if cls._NEST_UNDER: + return cls.parse_obj(json.loads(databag[cls._NEST_UNDER])) + + try: + data = { + k: json.loads(v) + for k, v in databag.items() + # Don't attempt to parse model-external values + if k in {f.alias for f in cls.__fields__.values()} + } + except json.JSONDecodeError as e: + msg = f"invalid databag contents: expecting json. {databag}" + logger.error(msg) + raise DataValidationError(msg) from e + + try: + return cls.parse_raw(json.dumps(data)) # type: ignore + except pydantic.ValidationError as e: + msg = f"failed to validate databag: {databag}" + logger.debug(msg, exc_info=True) + raise DataValidationError(msg) from e + + def dump(self, databag: Optional[MutableMapping] = None, clear: bool = True): + """Write the contents of this model to Juju databag. + + :param databag: the databag to write the data to. + :param clear: ensure the databag is cleared before writing it. + """ + if clear and databag: + databag.clear() + + if databag is None: + databag = {} + + if self._NEST_UNDER: + databag[self._NEST_UNDER] = self.json(by_alias=True) + return databag + + dct = self.dict() + for key, field in self.__fields__.items(): # type: ignore + value = dct[key] + databag[field.alias or key] = json.dumps(value) + + return databag + +else: + from pydantic import ConfigDict + + class DatabagModel(BaseModel): + """Base databag model.""" + + model_config = ConfigDict( + # ignore any extra fields in the databag + extra="ignore", + # Allow instantiating this class by field name (instead of forcing alias). + populate_by_name=True, + # Custom config key: whether to nest the whole datastructure (as json) + # under a field or spread it out at the toplevel. + _NEST_UNDER=None, # type: ignore + ) + """Pydantic config.""" + + @classmethod + def load(cls, databag: MutableMapping): + """Load this model from a Juju databag.""" + nest_under = cls.model_config.get("_NEST_UNDER") # type: ignore + if nest_under: + return cls.model_validate(json.loads(databag[nest_under])) # type: ignore + + try: + data = { + k: json.loads(v) + for k, v in databag.items() + # Don't attempt to parse model-external values + if k in {(f.alias or n) for n, f in cls.__fields__.items()} + } + except json.JSONDecodeError as e: + msg = f"invalid databag contents: expecting json. {databag}" + logger.error(msg) + raise DataValidationError(msg) from e + + try: + return cls.model_validate_json(json.dumps(data)) # type: ignore + except pydantic.ValidationError as e: + msg = f"failed to validate databag: {databag}" + logger.debug(msg, exc_info=True) + raise DataValidationError(msg) from e + + def dump(self, databag: Optional[MutableMapping] = None, clear: bool = True): + """Write the contents of this model to Juju databag. + + :param databag: the databag to write the data to. + :param clear: ensure the databag is cleared before writing it. + """ + if clear and databag: + databag.clear() + + if databag is None: + databag = {} + nest_under = self.model_config.get("_NEST_UNDER") + if nest_under: + databag[nest_under] = self.model_dump_json( # type: ignore + by_alias=True, + # skip keys whose values are default + exclude_defaults=True, + ) + return databag + + dct = self.model_dump() # type: ignore + for key, field in self.model_fields.items(): # type: ignore + value = dct[key] + if value == field.default: + continue + databag[field.alias or key] = json.dumps(value) + + return databag + + +# todo use models from charm-relation-interfaces +if int(pydantic.version.VERSION.split(".")[0]) < 2: + + class ProtocolType(BaseModel): # type: ignore + """Protocol Type.""" + + class Config: + """Pydantic config.""" + + use_enum_values = True + """Allow serializing enum values.""" + + name: str = Field( + ..., + description="Receiver protocol name. What protocols are supported (and what they are called) " + "may differ per provider.", + examples=["otlp_grpc", "otlp_http", "tempo_http"], + ) + + type: TransportProtocolType = Field( + ..., + description="The transport protocol used by this receiver.", + examples=["http", "grpc"], + ) + +else: + + class ProtocolType(BaseModel): + """Protocol Type.""" + + model_config = ConfigDict( # type: ignore + # Allow serializing enum values. + use_enum_values=True + ) + """Pydantic config.""" + + name: str = Field( + ..., + description="Receiver protocol name. What protocols are supported (and what they are called) " + "may differ per provider.", + examples=["otlp_grpc", "otlp_http", "tempo_http"], + ) + + type: TransportProtocolType = Field( + ..., + description="The transport protocol used by this receiver.", + examples=["http", "grpc"], + ) + + +class Receiver(BaseModel): + """Specification of an active receiver.""" + + protocol: ProtocolType = Field(..., description="Receiver protocol name and type.") + url: str = Field( + ..., + description="""URL at which the receiver is reachable. If there's an ingress, it would be the external URL. + Otherwise, it would be the service's fqdn or internal IP. + If the protocol type is grpc, the url will not contain a scheme.""", + examples=[ + "http://traefik_address:2331", + "https://traefik_address:2331", + "http://tempo_public_ip:2331", + "https://tempo_public_ip:2331", + "tempo_public_ip:2331", + ], + ) + + +class TracingProviderAppData(DatabagModel): # noqa: D101 + """Application databag model for the tracing provider.""" + + receivers: List[Receiver] = Field( + ..., + description="List of all receivers enabled on the tracing provider.", + ) + + +class TracingRequirerAppData(DatabagModel): # noqa: D101 + """Application databag model for the tracing requirer.""" + + receivers: List[ReceiverProtocol] + """Requested receivers.""" + + +class _AutoSnapshotEvent(RelationEvent): + __args__: Tuple[str, ...] = () + __optional_kwargs__: Dict[str, Any] = {} + + @classmethod + def __attrs__(cls): + return cls.__args__ + tuple(cls.__optional_kwargs__.keys()) + + def __init__(self, handle, relation, *args, **kwargs): + super().__init__(handle, relation) + + if not len(self.__args__) == len(args): + raise TypeError("expected {} args, got {}".format(len(self.__args__), len(args))) + + for attr, obj in zip(self.__args__, args): + setattr(self, attr, obj) + for attr, default in self.__optional_kwargs__.items(): + obj = kwargs.get(attr, default) + setattr(self, attr, obj) + + def snapshot(self) -> dict: + dct = super().snapshot() + for attr in self.__attrs__(): + obj = getattr(self, attr) + try: + dct[attr] = obj + except ValueError as e: + raise ValueError( + "cannot automagically serialize {}: " + "override this method and do it " + "manually.".format(obj) + ) from e + + return dct + + def restore(self, snapshot: dict) -> None: + super().restore(snapshot) + for attr, obj in snapshot.items(): + setattr(self, attr, obj) + + +class RelationNotFoundError(Exception): + """Raised if no relation with the given name is found.""" + + def __init__(self, relation_name: str): + self.relation_name = relation_name + self.message = "No relation named '{}' found".format(relation_name) + super().__init__(self.message) + + +class RelationInterfaceMismatchError(Exception): + """Raised if the relation with the given name has an unexpected interface.""" + + def __init__( + self, + relation_name: str, + expected_relation_interface: str, + actual_relation_interface: str, + ): + self.relation_name = relation_name + self.expected_relation_interface = expected_relation_interface + self.actual_relation_interface = actual_relation_interface + self.message = ( + "The '{}' relation has '{}' as interface rather than the expected '{}'".format( + relation_name, actual_relation_interface, expected_relation_interface + ) + ) + + super().__init__(self.message) + + +class RelationRoleMismatchError(Exception): + """Raised if the relation with the given name has a different role than expected.""" + + def __init__( + self, + relation_name: str, + expected_relation_role: RelationRole, + actual_relation_role: RelationRole, + ): + self.relation_name = relation_name + self.expected_relation_interface = expected_relation_role + self.actual_relation_role = actual_relation_role + self.message = "The '{}' relation has role '{}' rather than the expected '{}'".format( + relation_name, repr(actual_relation_role), repr(expected_relation_role) + ) + + super().__init__(self.message) + + +def _validate_relation_by_interface_and_direction( + charm: CharmBase, + relation_name: str, + expected_relation_interface: str, + expected_relation_role: RelationRole, +): + """Validate a relation. + + Verifies that the `relation_name` provided: (1) exists in metadata.yaml, + (2) declares as interface the interface name passed as `relation_interface` + and (3) has the right "direction", i.e., it is a relation that `charm` + provides or requires. + + Args: + charm: a `CharmBase` object to scan for the matching relation. + relation_name: the name of the relation to be verified. + expected_relation_interface: the interface name to be matched by the + relation named `relation_name`. + expected_relation_role: whether the `relation_name` must be either + provided or required by `charm`. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the same relation interface + as specified via the `expected_relation_interface` argument. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the same role as specified + via the `expected_relation_role` argument. + """ + if relation_name not in charm.meta.relations: + raise RelationNotFoundError(relation_name) + + relation = charm.meta.relations[relation_name] + + # fixme: why do we need to cast here? + actual_relation_interface = cast(str, relation.interface_name) + + if actual_relation_interface != expected_relation_interface: + raise RelationInterfaceMismatchError( + relation_name, expected_relation_interface, actual_relation_interface + ) + + if expected_relation_role is RelationRole.provides: + if relation_name not in charm.meta.provides: + raise RelationRoleMismatchError( + relation_name, RelationRole.provides, RelationRole.requires + ) + elif expected_relation_role is RelationRole.requires: + if relation_name not in charm.meta.requires: + raise RelationRoleMismatchError( + relation_name, RelationRole.requires, RelationRole.provides + ) + else: + raise TypeError("Unexpected RelationDirection: {}".format(expected_relation_role)) + + +class RequestEvent(RelationEvent): + """Event emitted when a remote requests a tracing endpoint.""" + + @property + def requested_receivers(self) -> List[ReceiverProtocol]: + """List of receiver protocols that have been requested.""" + relation = self.relation + app = relation.app + if not app: + raise NotReadyError("relation.app is None") + + return TracingRequirerAppData.load(relation.data[app]).receivers + + +class BrokenEvent(RelationBrokenEvent): + """Event emitted when a relation on tracing is broken.""" + + +class TracingEndpointProviderEvents(CharmEvents): + """TracingEndpointProvider events.""" + + request = EventSource(RequestEvent) + broken = EventSource(BrokenEvent) + + +class TracingEndpointProvider(Object): + """Class representing a trace receiver service.""" + + on = TracingEndpointProviderEvents() # type: ignore + + def __init__( + self, + charm: CharmBase, + external_url: Optional[str] = None, + relation_name: str = DEFAULT_RELATION_NAME, + ): + """Initialize. + + Args: + charm: a `CharmBase` instance that manages this instance of the Tempo service. + external_url: external address of the node hosting the tempo server, + if an ingress is present. + relation_name: an optional string name of the relation between `charm` + and the Tempo charmed service. The default is "tracing". + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `tracing` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.requires` + role. + """ + _validate_relation_by_interface_and_direction( + charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.provides + ) + + super().__init__(charm, relation_name + "tracing-provider") + self._charm = charm + self._external_url = external_url + self._relation_name = relation_name + self.framework.observe( + self._charm.on[relation_name].relation_joined, self._on_relation_event + ) + self.framework.observe( + self._charm.on[relation_name].relation_created, self._on_relation_event + ) + self.framework.observe( + self._charm.on[relation_name].relation_changed, self._on_relation_event + ) + self.framework.observe( + self._charm.on[relation_name].relation_broken, self._on_relation_broken_event + ) + + def _on_relation_broken_event(self, e: RelationBrokenEvent): + """Handle relation broken events.""" + self.on.broken.emit(e.relation) + + def _on_relation_event(self, e: RelationEvent): + """Handle relation created/joined/changed events.""" + if self.is_requirer_ready(e.relation): + self.on.request.emit(e.relation) + + def is_requirer_ready(self, relation: Relation): + """Attempt to determine if requirer has already populated app data.""" + try: + self._get_requested_protocols(relation) + except NotReadyError: + return False + return True + + @staticmethod + def _get_requested_protocols(relation: Relation): + app = relation.app + if not app: + raise NotReadyError("relation.app is None") + + try: + databag = TracingRequirerAppData.load(relation.data[app]) + except (json.JSONDecodeError, pydantic.ValidationError, DataValidationError): + logger.info(f"relation {relation} is not ready to talk tracing") + raise NotReadyError() + return databag.receivers + + def requested_protocols(self): + """All receiver protocols that have been requested by our related apps.""" + requested_protocols = set() + for relation in self.relations: + try: + protocols = self._get_requested_protocols(relation) + except NotReadyError: + continue + requested_protocols.update(protocols) + return requested_protocols + + @property + def relations(self) -> List[Relation]: + """All relations active on this endpoint.""" + return self._charm.model.relations[self._relation_name] + + def publish_receivers(self, receivers: Sequence[RawReceiver]): + """Let all requirers know that these receivers are active and listening.""" + if not self._charm.unit.is_leader(): + raise RuntimeError("only leader can do this") + + for relation in self.relations: + try: + TracingProviderAppData( + receivers=[ + Receiver( + url=url, + protocol=ProtocolType( + name=protocol, + type=receiver_protocol_to_transport_protocol[protocol], + ), + ) + for protocol, url in receivers + ], + ).dump(relation.data[self._charm.app]) + + except ModelError as e: + # args are bytes + msg = e.args[0] + if isinstance(msg, bytes): + if msg.startswith( + b"ERROR cannot read relation application settings: permission denied" + ): + logger.error( + f"encountered error {e} while attempting to update_relation_data." + f"The relation must be gone." + ) + continue + raise + + +class EndpointRemovedEvent(RelationBrokenEvent): + """Event representing a change in one of the receiver endpoints.""" + + +class EndpointChangedEvent(_AutoSnapshotEvent): + """Event representing a change in one of the receiver endpoints.""" + + __args__ = ("_receivers",) + + if TYPE_CHECKING: + _receivers = [] # type: List[dict] + + @property + def receivers(self) -> List[Receiver]: + """Cast receivers back from dict.""" + return [Receiver(**i) for i in self._receivers] + + +class TracingEndpointRequirerEvents(CharmEvents): + """TracingEndpointRequirer events.""" + + endpoint_changed = EventSource(EndpointChangedEvent) + endpoint_removed = EventSource(EndpointRemovedEvent) + + +class TracingEndpointRequirer(Object): + """A tracing endpoint for Tempo.""" + + on = TracingEndpointRequirerEvents() # type: ignore + + def __init__( + self, + charm: CharmBase, + relation_name: str = DEFAULT_RELATION_NAME, + protocols: Optional[List[ReceiverProtocol]] = None, + ): + """Construct a tracing requirer for a Tempo charm. + + If your application supports pushing traces to a distributed tracing backend, the + `TracingEndpointRequirer` object enables your charm to easily access endpoint information + exchanged over a `tracing` relation interface. + + Args: + charm: a `CharmBase` object that manages this + `TracingEndpointRequirer` object. Typically, this is `self` in the instantiating + class. + relation_name: an optional string name of the relation between `charm` + and the Tempo charmed service. The default is "tracing". It is strongly + advised not to change the default, so that people deploying your charm will have a + consistent experience with all other charms that provide tracing endpoints. + protocols: optional list of protocols that the charm intends to send traces with. + The provider will enable receivers for these and only these protocols, + so be sure to enable all protocols the charm or its workload are going to need. + + Raises: + RelationNotFoundError: If there is no relation in the charm's metadata.yaml + with the same name as provided via `relation_name` argument. + RelationInterfaceMismatchError: The relation with the same name as provided + via `relation_name` argument does not have the `tracing` relation + interface. + RelationRoleMismatchError: If the relation with the same name as provided + via `relation_name` argument does not have the `RelationRole.provides` + role. + """ + _validate_relation_by_interface_and_direction( + charm, relation_name, RELATION_INTERFACE_NAME, RelationRole.requires + ) + + super().__init__(charm, relation_name) + + self._is_single_endpoint = charm.meta.relations[relation_name].limit == 1 + + self._charm = charm + self._relation_name = relation_name + + events = self._charm.on[self._relation_name] + self.framework.observe(events.relation_changed, self._on_tracing_relation_changed) + self.framework.observe(events.relation_broken, self._on_tracing_relation_broken) + + if protocols: + self.request_protocols(protocols) + + def request_protocols( + self, protocols: Sequence[ReceiverProtocol], relation: Optional[Relation] = None + ): + """Publish the list of protocols which the provider should activate.""" + # todo: should we check if _is_single_endpoint and len(self.relations) > 1 and raise, here? + relations = [relation] if relation else self.relations + + if not protocols: + # empty sequence + raise ValueError( + "You need to pass a nonempty sequence of protocols to `request_protocols`." + ) + + try: + if self._charm.unit.is_leader(): + for relation in relations: + TracingRequirerAppData( + receivers=list(protocols), + ).dump(relation.data[self._charm.app]) + + except ModelError as e: + # args are bytes + msg = e.args[0] + if isinstance(msg, bytes): + if msg.startswith( + b"ERROR cannot read relation application settings: permission denied" + ): + logger.error( + f"encountered error {e} while attempting to request_protocols." + f"The relation must be gone." + ) + return + raise + + @property + def relations(self) -> List[Relation]: + """The tracing relations associated with this endpoint.""" + return self._charm.model.relations[self._relation_name] + + @property + def _relation(self) -> Optional[Relation]: + """If this wraps a single endpoint, the relation bound to it, if any.""" + if not self._is_single_endpoint: + objname = type(self).__name__ + raise AmbiguousRelationUsageError( + f"This {objname} wraps a {self._relation_name} endpoint that has " + "limit != 1. We can't determine what relation, of the possibly many, you are " + f"talking about. Please pass a relation instance while calling {objname}, " + "or set limit=1 in the charm metadata." + ) + relations = self.relations + return relations[0] if relations else None + + def is_ready(self, relation: Optional[Relation] = None): + """Is this endpoint ready?""" + relation = relation or self._relation + if not relation: + logger.debug(f"no relation on {self._relation_name !r}: tracing not ready") + return False + if relation.data is None: + logger.error(f"relation data is None for {relation}") + return False + if not relation.app: + logger.error(f"{relation} event received but there is no relation.app") + return False + try: + databag = dict(relation.data[relation.app]) + TracingProviderAppData.load(databag) + + except (json.JSONDecodeError, pydantic.ValidationError, DataValidationError): + logger.info(f"failed validating relation data for {relation}") + return False + return True + + def _on_tracing_relation_changed(self, event): + """Notify the providers that there is new endpoint information available.""" + relation = event.relation + if not self.is_ready(relation): + self.on.endpoint_removed.emit(relation) # type: ignore + return + + data = TracingProviderAppData.load(relation.data[relation.app]) + self.on.endpoint_changed.emit(relation, [i.dict() for i in data.receivers]) # type: ignore + + def _on_tracing_relation_broken(self, event: RelationBrokenEvent): + """Notify the providers that the endpoint is broken.""" + relation = event.relation + self.on.endpoint_removed.emit(relation) # type: ignore + + def get_all_endpoints( + self, relation: Optional[Relation] = None + ) -> Optional[TracingProviderAppData]: + """Unmarshalled relation data.""" + relation = relation or self._relation + if not self.is_ready(relation): + return + return TracingProviderAppData.load(relation.data[relation.app]) # type: ignore + + def _get_endpoint( + self, relation: Optional[Relation], protocol: ReceiverProtocol + ) -> Optional[str]: + app_data = self.get_all_endpoints(relation) + if not app_data: + return None + receivers: List[Receiver] = list( + filter(lambda i: i.protocol.name == protocol, app_data.receivers) + ) + if not receivers: + # it can happen if the charm requests tracing protocols, but the relay (such as grafana-agent) isn't yet + # connected to the tracing backend. In this case, it's not an error the charm author can do anything about + logger.warning(f"no receiver found with protocol={protocol!r}.") + return + if len(receivers) > 1: + # if we have more than 1 receiver that matches, it shouldn't matter which receiver we'll be using. + logger.warning( + f"too many receivers with protocol={protocol!r}; using first one. Found: {receivers}" + ) + + receiver = receivers[0] + return receiver.url + + def get_endpoint( + self, protocol: ReceiverProtocol, relation: Optional[Relation] = None + ) -> Optional[str]: + """Receiver endpoint for the given protocol. + + It could happen that this function gets called before the provider publishes the endpoints. + In such a scenario, if a non-leader unit calls this function, a permission denied exception will be raised due to + restricted access. To prevent this, this function needs to be guarded by the `is_ready` check. + + Raises: + ProtocolNotRequestedError: + If the charm unit is the leader unit and attempts to obtain an endpoint for a protocol it did not request. + """ + endpoint = self._get_endpoint(relation or self._relation, protocol=protocol) + if not endpoint: + requested_protocols = set() + relations = [relation] if relation else self.relations + for relation in relations: + try: + databag = TracingRequirerAppData.load(relation.data[self._charm.app]) + except DataValidationError: + continue + + requested_protocols.update(databag.receivers) + + if protocol not in requested_protocols: + raise ProtocolNotRequestedError(protocol, relation) + + return None + return endpoint + + +def charm_tracing_config( + endpoint_requirer: TracingEndpointRequirer, cert_path: Optional[Union[Path, str]] +) -> Tuple[Optional[str], Optional[str]]: + """Return the charm_tracing config you likely want. + + If no endpoint is provided: + disable charm tracing. + If https endpoint is provided but cert_path is not found on disk: + disable charm tracing. + If https endpoint is provided and cert_path is None: + ERROR + Else: + proceed with charm tracing (with or without tls, as appropriate) + + Usage: + >>> from lib.charms.tempo_coordinator_k8s.v0.charm_tracing import trace_charm + >>> from lib.charms.tempo_coordinator_k8s.v0.tracing import charm_tracing_config + >>> @trace_charm(tracing_endpoint="my_endpoint", cert_path="cert_path") + >>> class MyCharm(...): + >>> _cert_path = "/path/to/cert/on/charm/container.crt" + >>> def __init__(self, ...): + >>> self.tracing = TracingEndpointRequirer(...) + >>> self.my_endpoint, self.cert_path = charm_tracing_config( + ... self.tracing, self._cert_path) + """ + if not endpoint_requirer.is_ready(): + return None, None + + endpoint = endpoint_requirer.get_endpoint("otlp_http") + if not endpoint: + return None, None + + is_https = endpoint.startswith("https://") + + if is_https: + if cert_path is None or not Path(cert_path).exists(): + # disable charm tracing until we obtain a cert to prevent tls errors + logger.error( + "Tracing endpoint is https, but no server_cert has been passed." + "Please point @trace_charm to a `server_cert` attr. " + "This might also mean that the tracing provider is related to a " + "certificates provider, but this application is not (yet). " + "In that case, you might just have to wait a bit for the certificates " + "integration to settle. " + ) + return None, None + return endpoint, str(cert_path) + else: + return endpoint, None diff --git a/examples/go/charm/lib/charms/traefik_k8s/v2/ingress.py b/examples/go/charm/lib/charms/traefik_k8s/v2/ingress.py index 582a31f..bb7ac5e 100644 --- a/examples/go/charm/lib/charms/traefik_k8s/v2/ingress.py +++ b/examples/go/charm/lib/charms/traefik_k8s/v2/ingress.py @@ -1,4 +1,4 @@ -# Copyright 2025 Canonical Ltd. +# Copyright 2024 Canonical Ltd. # See LICENSE file for licensing details. r"""# Interface Library for ingress. @@ -56,13 +56,14 @@ def _on_ingress_revoked(self, event: IngressPerAppRevokedEvent): import socket import typing from dataclasses import dataclass +from functools import partial from typing import Any, Callable, Dict, List, MutableMapping, Optional, Sequence, Tuple, Union import pydantic from ops.charm import CharmBase, RelationBrokenEvent, RelationEvent from ops.framework import EventSource, Object, ObjectEvents, StoredState from ops.model import ModelError, Relation, Unit -from pydantic import AnyHttpUrl, BaseModel, Field, validator +from pydantic import AnyHttpUrl, BaseModel, Field # The unique Charmhub library identifier, never change it LIBID = "e6de2a5cd5b34422a204668f3b8f90d2" @@ -72,7 +73,7 @@ def _on_ingress_revoked(self, event: IngressPerAppRevokedEvent): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 13 +LIBPATCH = 14 PYDEPS = ["pydantic"] @@ -84,6 +85,9 @@ def _on_ingress_revoked(self, event: IngressPerAppRevokedEvent): PYDANTIC_IS_V1 = int(pydantic.version.VERSION.split(".")[0]) < 2 if PYDANTIC_IS_V1: + from pydantic import validator + + input_validator = partial(validator, pre=True) class DatabagModel(BaseModel): # type: ignore """Base databag model.""" @@ -143,7 +147,9 @@ def dump(self, databag: Optional[MutableMapping] = None, clear: bool = True): return databag else: - from pydantic import ConfigDict + from pydantic import ConfigDict, field_validator + + input_validator = partial(field_validator, mode="before") class DatabagModel(BaseModel): """Base databag model.""" @@ -171,7 +177,7 @@ def load(cls, databag: MutableMapping): k: json.loads(v) for k, v in databag.items() # Don't attempt to parse model-external values - if k in {(f.alias or n) for n, f in cls.__fields__.items()} # type: ignore + if k in {(f.alias or n) for n, f in cls.model_fields.items()} # type: ignore } except json.JSONDecodeError as e: msg = f"invalid databag contents: expecting json. {databag}" @@ -252,14 +258,14 @@ class IngressRequirerAppData(DatabagModel): default="http", description="What scheme to use in the generated ingress url" ) - @validator("scheme", pre=True) + @input_validator("scheme") def validate_scheme(cls, scheme): # noqa: N805 # pydantic wants 'cls' as first arg """Validate scheme arg.""" if scheme not in {"http", "https", "h2c"}: raise ValueError("invalid scheme: should be one of `http|https|h2c`") return scheme - @validator("port", pre=True) + @input_validator("port") def validate_port(cls, port): # noqa: N805 # pydantic wants 'cls' as first arg """Validate port.""" assert isinstance(port, int), type(port) @@ -277,13 +283,13 @@ class IngressRequirerUnitData(DatabagModel): "IP can only be None if the IP information can't be retrieved from juju.", ) - @validator("host", pre=True) + @input_validator("host") def validate_host(cls, host): # noqa: N805 # pydantic wants 'cls' as first arg """Validate host.""" assert isinstance(host, str), type(host) return host - @validator("ip", pre=True) + @input_validator("ip") def validate_ip(cls, ip): # noqa: N805 # pydantic wants 'cls' as first arg """Validate ip.""" if ip is None: @@ -462,7 +468,10 @@ def _handle_relation(self, event): event.relation, data.app.name, data.app.model, - [unit.dict() for unit in data.units], + [ + unit.dict() if PYDANTIC_IS_V1 else unit.model_dump(mode="json") + for unit in data.units + ], data.app.strip_prefix or False, data.app.redirect_https or False, ) From 387ab90da83ff5f82479af713458a545e9fdf348 Mon Sep 17 00:00:00 2001 From: Ali Ugur Date: Thu, 30 Jan 2025 06:25:55 +0300 Subject: [PATCH 2/5] Lint(): Format code to make linter happy --- src/paas_charm/charm_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/paas_charm/charm_utils.py b/src/paas_charm/charm_utils.py index f95402b..ac3a918 100644 --- a/src/paas_charm/charm_utils.py +++ b/src/paas_charm/charm_utils.py @@ -33,7 +33,7 @@ def update_app_and_unit_status(self, status: ops.StatusBase) -> None: def block_if_invalid_config( - method: typing.Callable[[C, E], None] + method: typing.Callable[[C, E], None], ) -> typing.Callable[[C, E], None]: """Create a decorator that puts the charm in blocked state if the config is wrong. From 1cb4dedd84c4c13239981652f2aa9cedd69f41c3 Mon Sep 17 00:00:00 2001 From: Ali Ugur Date: Thu, 30 Jan 2025 13:45:56 +0300 Subject: [PATCH 3/5] Chore(fmt): Format example code --- .../fastapi/alembic/versions/eca6177bd16a_initial_migration.py | 2 +- .../alembic/versions/eca6177bd16a_initial_migration.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/fastapi/alembic/versions/eca6177bd16a_initial_migration.py b/examples/fastapi/alembic/versions/eca6177bd16a_initial_migration.py index af71c01..7838caf 100644 --- a/examples/fastapi/alembic/versions/eca6177bd16a_initial_migration.py +++ b/examples/fastapi/alembic/versions/eca6177bd16a_initial_migration.py @@ -4,7 +4,7 @@ """Initial migration Revision ID: eca6177bd16a -Revises: +Revises: Create Date: 2023-09-05 17:12:56.303534 """ diff --git a/examples/flask/test_db_rock/alembic/versions/eca6177bd16a_initial_migration.py b/examples/flask/test_db_rock/alembic/versions/eca6177bd16a_initial_migration.py index af71c01..7838caf 100644 --- a/examples/flask/test_db_rock/alembic/versions/eca6177bd16a_initial_migration.py +++ b/examples/flask/test_db_rock/alembic/versions/eca6177bd16a_initial_migration.py @@ -4,7 +4,7 @@ """Initial migration Revision ID: eca6177bd16a -Revises: +Revises: Create Date: 2023-09-05 17:12:56.303534 """ From cf1757682071e75e5f474ee5efb69f2b8bea4978 Mon Sep 17 00:00:00 2001 From: Ali Ugur Date: Mon, 3 Feb 2025 06:31:01 +0300 Subject: [PATCH 4/5] Doc(): Fix broken link --- README.md | 2 +- docs/index.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 60a615e..01fc382 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ for more! Additional resources: -* [Tutorial to build a rock for a Flask application](https://documentation.ubuntu.com/rockcraft/en/latest/tutorial/flask/) +* [Tutorial to build a rock for a Flask application](https://documentation.ubuntu.com/rockcraft/en/latest/tutorial/flask.html/) * [Charmcraft `flask-framework` reference](https://juju.is/docs/sdk/charmcraft-extension-flask-framework) * [Charmcraft `flask-framework` how to guides](https://juju.is/docs/sdk/build-a-paas-charm) * [Rockcraft`flask-framework` diff --git a/docs/index.rst b/docs/index.rst index d49654d..a4f6f8b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -67,7 +67,7 @@ commands that need to be run in the root directory of the Flask application: rockcraft pack The `full Rockcraft tutorial -`_ for +`_ for creating an OCI image for a Flask application takes you from a plain Ubuntu installation to a production ready OCI image for your Flask application. From 1cc86f4cebe8e87d6697cf7224b789be4a9fa8b4 Mon Sep 17 00:00:00 2001 From: Ali Ugur Date: Mon, 3 Feb 2025 06:33:53 +0300 Subject: [PATCH 5/5] Doc(): Fix broken link --- README.md | 2 +- docs/index.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 01fc382..6da147c 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ for more! Additional resources: -* [Tutorial to build a rock for a Flask application](https://documentation.ubuntu.com/rockcraft/en/latest/tutorial/flask.html/) +* [Tutorial to build a rock for a Flask application](https://documentation.ubuntu.com/rockcraft/en/latest/tutorial/flask.html) * [Charmcraft `flask-framework` reference](https://juju.is/docs/sdk/charmcraft-extension-flask-framework) * [Charmcraft `flask-framework` how to guides](https://juju.is/docs/sdk/build-a-paas-charm) * [Rockcraft`flask-framework` diff --git a/docs/index.rst b/docs/index.rst index a4f6f8b..984459d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -67,7 +67,7 @@ commands that need to be run in the root directory of the Flask application: rockcraft pack The `full Rockcraft tutorial -`_ for +`_ for creating an OCI image for a Flask application takes you from a plain Ubuntu installation to a production ready OCI image for your Flask application.