Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Delete unreferened state groups during purge
Browse files Browse the repository at this point in the history
  • Loading branch information
erikjohnston committed Oct 4, 2018
1 parent d867943 commit 17d5857
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 6 deletions.
33 changes: 27 additions & 6 deletions synapse/storage/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -2025,6 +2025,7 @@ def _purge_history_txn(
logger.info("[purge] finding state groups which depend on redundant"
" state groups")
remaining_state_groups = []
unreferenced_state_groups = 0
for i in range(0, len(state_rows), 100):
chunk = [sg for sg, in state_rows[i:i + 100]]
# look for state groups whose prev_state_group is one we are about
Expand All @@ -2037,13 +2038,33 @@ def _purge_history_txn(
retcols=["state_group"],
keyvalues={},
)
remaining_state_groups.extend(
row["state_group"] for row in rows

# exclude state groups we are about to delete: no point in
# updating them
if row["state_group"] not in state_groups_to_delete
)
for row in rows:
sg = row["state_group"]

if sg in state_groups_to_delete:
# exclude state groups we are about to delete: no point in
# updating them
continue

if not self._is_state_group_referenced(txn, sg):
# Let's also delete unreferenced state groups while we're
# here, since otherwise we'd need to de-delta them
state_groups_to_delete.add(sg)
unreferenced_state_groups += 1
continue

remaining_state_groups.append(sg)

logger.info(
"[purge] found %i extra unreferenced state groups to delete",
unreferenced_state_groups,
)

logger.info(
"[purge] de-delta-ing %i remaining state groups",
len(remaining_state_groups),
)

# Now we turn the state groups that reference to-be-deleted state
# groups to non delta versions.
Expand Down
50 changes: 50 additions & 0 deletions synapse/storage/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -1041,6 +1041,56 @@ def _count_state_group_hops_txn(self, txn, state_group):

return count

def _is_state_group_referenced(self, txn, state_group):
"""Checks if a given state group is referenced, or is safe to delete.
A state groups is referenced if it or any of its descendants are
pointed at by an event. (A descendant is a group which has the given
state_group as a prev group)
"""

# We check this by doing a depth first search to look for any
# descendant referenced by `event_to_state_groups`.

# State groups we need to check, contains state groups that are
# descendants of `state_group`
state_groups_to_search = [state_group]

# Set of state groups we've already checked
state_groups_searched = set()

while state_groups_to_search:
state_group = state_groups_to_search.pop() # Next state group to check

is_referenced = self._simple_select_one_onecol_txn(
txn,
table="event_to_state_groups",
keyvalues={"state_group": state_group},
retcol="event_id",
allow_none=True,
)
if is_referenced:
# A descendant is referenced by event_to_state_groups, so
# original state group is referenced.
return True

state_groups_searched.add(state_group)

# Find all children of current state group and add to search
references = self._simple_select_onecol_txn(
txn,
table="state_group_edges",
keyvalues={"prev_state_group": state_group},
retcol="state_group",
)
state_groups_to_search.extend(references)

# Lets be paranoid and check for cycles
if state_groups_searched.intersection(references):
raise Exception("State group %s has cyclic dependency", state_group)

return False


class StateStore(StateGroupWorkerStore, BackgroundUpdateStore):
""" Keeps track of the state at a given event.
Expand Down

0 comments on commit 17d5857

Please sign in to comment.