Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Fix cache metrics not being updated when not using the legacy exposition module. #13717

Merged
merged 4 commits into from
Sep 8, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions synapse/metrics/_legacy_exposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@
from twisted.web.resource import Resource
from twisted.web.server import Request

from synapse.util import caches

CONTENT_TYPE_LATEST = "text/plain; version=0.0.4; charset=utf-8"


Expand Down Expand Up @@ -102,11 +100,6 @@ def generate_latest(registry: CollectorRegistry, emit_help: bool = False) -> byt
by prometheus-client.
"""

# Trigger the cache metrics to be rescraped, which updates the common
# metrics but do not produce metrics themselves
for collector in caches.collectors_by_name.values():
collector.collect()

output = []

for metric in registry.collect():
Expand Down
61 changes: 49 additions & 12 deletions synapse/util/caches/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,37 +20,74 @@
from typing import Any, Callable, Dict, List, Optional, Sized, TypeVar

import attr
from prometheus_client import REGISTRY
from prometheus_client.core import Gauge

from synapse.config.cache import add_resizable_cache
from synapse.util.metrics import DynamicCollectorRegistry

logger = logging.getLogger(__name__)


# Whether to track estimated memory usage of the LruCaches.
TRACK_MEMORY_USAGE = False

# We track cache metrics in a special registry that lets us update the metrics
# just before they are returned from the scrape endpoint.
CACHE_METRIC_REGISTRY = DynamicCollectorRegistry()

caches_by_name: Dict[str, Sized] = {}
collectors_by_name: Dict[str, "CacheMetric"] = {}

cache_size = Gauge("synapse_util_caches_cache_size", "", ["name"])
cache_hits = Gauge("synapse_util_caches_cache_hits", "", ["name"])
cache_evicted = Gauge("synapse_util_caches_cache_evicted_size", "", ["name", "reason"])
cache_total = Gauge("synapse_util_caches_cache", "", ["name"])
cache_max_size = Gauge("synapse_util_caches_cache_max_size", "", ["name"])
cache_size = Gauge(
"synapse_util_caches_cache_size", "", ["name"], registry=CACHE_METRIC_REGISTRY
)
cache_hits = Gauge(
"synapse_util_caches_cache_hits", "", ["name"], registry=CACHE_METRIC_REGISTRY
)
cache_evicted = Gauge(
"synapse_util_caches_cache_evicted_size",
"",
["name", "reason"],
registry=CACHE_METRIC_REGISTRY,
)
cache_total = Gauge(
"synapse_util_caches_cache", "", ["name"], registry=CACHE_METRIC_REGISTRY
)
cache_max_size = Gauge(
"synapse_util_caches_cache_max_size", "", ["name"], registry=CACHE_METRIC_REGISTRY
)
cache_memory_usage = Gauge(
"synapse_util_caches_cache_size_bytes",
"Estimated memory usage of the caches",
["name"],
registry=CACHE_METRIC_REGISTRY,
)

response_cache_size = Gauge("synapse_util_caches_response_cache_size", "", ["name"])
response_cache_hits = Gauge("synapse_util_caches_response_cache_hits", "", ["name"])
response_cache_size = Gauge(
"synapse_util_caches_response_cache_size",
"",
["name"],
registry=CACHE_METRIC_REGISTRY,
)
response_cache_hits = Gauge(
"synapse_util_caches_response_cache_hits",
"",
["name"],
registry=CACHE_METRIC_REGISTRY,
)
response_cache_evicted = Gauge(
"synapse_util_caches_response_cache_evicted_size", "", ["name", "reason"]
"synapse_util_caches_response_cache_evicted_size",
"",
["name", "reason"],
registry=CACHE_METRIC_REGISTRY,
)
response_cache_total = Gauge("synapse_util_caches_response_cache", "", ["name"])
response_cache_total = Gauge(
"synapse_util_caches_response_cache", "", ["name"], registry=CACHE_METRIC_REGISTRY
)


# Register our custom cache metrics registry with the global registry
REGISTRY.register(CACHE_METRIC_REGISTRY)


class EvictionReason(Enum):
Expand Down Expand Up @@ -168,9 +205,9 @@ def register_cache(
add_resizable_cache(cache_name, resize_callback)

metric = CacheMetric(cache, cache_type, cache_name, collect_callback)
metric_name = "cache_%s_%s" % (cache_type, cache_name)
# TODO evil ??metric_name = "cache_%s_%s" % (cache_type, cache_name)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's going on here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bahah, just cruft. I should have checked the diff! I'll remove it :)

caches_by_name[cache_name] = cache
collectors_by_name[metric_name] = metric
CACHE_METRIC_REGISTRY.register_hook(metric.collect)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this replaces _legacy_exposition.py line -108?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep!

return metric


Expand Down
34 changes: 32 additions & 2 deletions synapse/util/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
import logging
from functools import wraps
from types import TracebackType
from typing import Awaitable, Callable, Optional, Type, TypeVar
from typing import Awaitable, Callable, Generator, List, Optional, Type, TypeVar

from prometheus_client import Counter
from prometheus_client import CollectorRegistry, Counter, Metric
from typing_extensions import Concatenate, ParamSpec, Protocol

from synapse.logging.context import (
Expand Down Expand Up @@ -208,3 +208,33 @@ def _update_in_flight(self, metrics: _InFlightMetric) -> None:
metrics.real_time_sum += duration

# TODO: Add other in flight metrics.


class DynamicCollectorRegistry(CollectorRegistry):
"""
Custom Prometheus Collector registry that calls a hook first, allowing you
to update metrics on-demand.

Don't forget to register this registry with the main registry!
"""

def __init__(self) -> None:
super().__init__()
self._pre_update_hooks: List[Callable[[], None]] = []

def collect(self) -> Generator[Metric]:
"""
Collects metrics, calling pre-update hooks first.
"""

for pre_update_hook in self._pre_update_hooks:
pre_update_hook()

yield from super().collect()

def register_hook(self, hook: Callable[[], None]) -> None:
"""
Registers a hook that is called before metric collection.
"""

self._pre_update_hooks.append(hook)