Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Make the metrics less racy (#4061)
Browse files Browse the repository at this point in the history
  • Loading branch information
hawkowl committed Oct 19, 2018
1 parent 6a4d01e commit b69216f
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 16 deletions.
1 change: 1 addition & 0 deletions changelog.d/4061.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix some metrics being racy and causing exceptions when polled by Prometheus.
31 changes: 18 additions & 13 deletions synapse/http/request_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
)

response_timer = Histogram(
"synapse_http_server_response_time_seconds", "sec",
"synapse_http_server_response_time_seconds",
"sec",
["method", "servlet", "tag", "code"],
)

Expand Down Expand Up @@ -79,15 +80,11 @@
# than when the response was written.

in_flight_requests_ru_utime = Counter(
"synapse_http_server_in_flight_requests_ru_utime_seconds",
"",
["method", "servlet"],
"synapse_http_server_in_flight_requests_ru_utime_seconds", "", ["method", "servlet"]
)

in_flight_requests_ru_stime = Counter(
"synapse_http_server_in_flight_requests_ru_stime_seconds",
"",
["method", "servlet"],
"synapse_http_server_in_flight_requests_ru_stime_seconds", "", ["method", "servlet"]
)

in_flight_requests_db_txn_count = Counter(
Expand Down Expand Up @@ -134,7 +131,7 @@ def _get_in_flight_counts():
# type
counts = {}
for rm in reqs:
key = (rm.method, rm.name,)
key = (rm.method, rm.name)
counts[key] = counts.get(key, 0) + 1

return counts
Expand Down Expand Up @@ -175,7 +172,8 @@ def stop(self, time_sec, response_code, sent_bytes):
if context != self.start_context:
logger.warn(
"Context have unexpectedly changed %r, %r",
context, self.start_context
context,
self.start_context,
)
return

Expand All @@ -192,10 +190,10 @@ def stop(self, time_sec, response_code, sent_bytes):
resource_usage = context.get_resource_usage()

response_ru_utime.labels(self.method, self.name, tag).inc(
resource_usage.ru_utime,
resource_usage.ru_utime
)
response_ru_stime.labels(self.method, self.name, tag).inc(
resource_usage.ru_stime,
resource_usage.ru_stime
)
response_db_txn_count.labels(self.method, self.name, tag).inc(
resource_usage.db_txn_count
Expand All @@ -222,8 +220,15 @@ def update_metrics(self):
diff = new_stats - self._request_stats
self._request_stats = new_stats

in_flight_requests_ru_utime.labels(self.method, self.name).inc(diff.ru_utime)
in_flight_requests_ru_stime.labels(self.method, self.name).inc(diff.ru_stime)
# max() is used since rapid use of ru_stime/ru_utime can end up with the
# count going backwards due to NTP, time smearing, fine-grained
# correction, or floating points. Who knows, really?
in_flight_requests_ru_utime.labels(self.method, self.name).inc(
max(diff.ru_utime, 0)
)
in_flight_requests_ru_stime.labels(self.method, self.name).inc(
max(diff.ru_stime, 0)
)

in_flight_requests_db_txn_count.labels(self.method, self.name).inc(
diff.db_txn_count
Expand Down
6 changes: 3 additions & 3 deletions synapse/notifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,17 +186,17 @@ def __init__(self, hs):
def count_listeners():
all_user_streams = set()

for x in self.room_to_user_streams.values():
for x in list(self.room_to_user_streams.values()):
all_user_streams |= x
for x in self.user_to_user_stream.values():
for x in list(self.user_to_user_stream.values()):
all_user_streams.add(x)

return sum(stream.count_listeners() for stream in all_user_streams)
LaterGauge("synapse_notifier_listeners", "", [], count_listeners)

LaterGauge(
"synapse_notifier_rooms", "", [],
lambda: count(bool, self.room_to_user_streams.values()),
lambda: count(bool, list(self.room_to_user_streams.values())),
)
LaterGauge(
"synapse_notifier_users", "", [],
Expand Down

0 comments on commit b69216f

Please sign in to comment.