Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Add metrics to track /messages response time by room size #13545

Merged
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/13545.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Update metrics to track `/messages` response time by room size.
55 changes: 53 additions & 2 deletions synapse/rest/client/room.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@
""" This module contains REST servlets to do with rooms: /rooms/<paths> """
import logging
import re
from enum import Enum
from typing import TYPE_CHECKING, Awaitable, Dict, List, Optional, Tuple
from urllib import parse as urlparse

from prometheus_client.core import Histogram

from twisted.internet import defer
from twisted.web.server import Request

from synapse import event_auth
Expand Down Expand Up @@ -49,6 +51,7 @@
)
from synapse.http.site import SynapseRequest
from synapse.logging.opentracing import set_tag
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.rest.client._base import client_patterns
from synapse.rest.client.transactions import HttpTransactionCache
from synapse.storage.state import StateFilter
Expand All @@ -62,6 +65,33 @@

logger = logging.getLogger(__name__)


class _RoomSize(Enum):
"""
Enum to differentiate sizes of rooms. This is a pretty good aproximation
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
about how hard it will be to get events in the room. We could also look at
room "complexity".
"""

# This doesn't necessarily mean the room is a DM, just that there is a DM
# amount of people there.
DM_SIZE = "direct_message_size"
SMALL = "small"
SUBSTANTIAL = "substantial"
LARGE = "large"

@staticmethod
def from_member_count(member_count: int) -> "_RoomSize":
if member_count <= 2:
return _RoomSize.DM_SIZE
elif member_count < 100:
return _RoomSize.SMALL
elif member_count < 1000:
return _RoomSize.SUBSTANTIAL
else:
return _RoomSize.LARGE


# This is an extra metric on top of `synapse_http_server_response_time_seconds`
# which times the same sort of thing but this one allows us to see values
# greater than 10s. We use a separate dedicated histogram with its own buckets
Expand All @@ -70,7 +100,11 @@
messsages_response_timer = Histogram(
"synapse_room_message_list_rest_servlet_response_time_seconds",
"sec",
[],
# We have a label for room size so we can try to see a more realistic
# picture of /messages response time for bigger rooms. We don't want the
# tiny rooms that can always respond fast skewing our results when we're trying
# to optimize the bigger cases.
["room_size"],
buckets=(
0.005,
0.01,
Expand Down Expand Up @@ -587,14 +621,25 @@ class RoomMessageListRestServlet(RestServlet):
def __init__(self, hs: "HomeServer"):
super().__init__()
self._hs = hs
self.clock = hs.get_clock()
self.pagination_handler = hs.get_pagination_handler()
self.auth = hs.get_auth()
self.store = hs.get_datastores().main

@messsages_response_timer.time()
async def on_GET(
self, request: SynapseRequest, room_id: str
) -> Tuple[int, JsonDict]:
processing_start_time = self.clock.time_msec()
# Fire and forget and hope that we get a result by the end.
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
#
# `get_number_joined_users_in_room(...)` returns an int so we can type
# this as a `Deferred[int]` instead of an optional.
room_member_count_deferred: defer.Deferred[int] = run_as_background_process(
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
"get_number_joined_users_in_room",
self.store.get_number_joined_users_in_room,
room_id,
)

requester = await self.auth.get_user_by_req(request, allow_guest=True)
pagination_config = await PaginationConfig.from_request(
self.store, request, default_limit=10
Expand Down Expand Up @@ -625,6 +670,12 @@ async def on_GET(
event_filter=event_filter,
)

processing_end_time = self.clock.time_msec()
room_member_count = await room_member_count_deferred
messsages_response_timer.labels(
room_size=_RoomSize.from_member_count(room_member_count)
).observe((processing_start_time - processing_end_time) / 1000)

return 200, msgs


Expand Down