Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Add config options for media retention #12732

Merged
merged 14 commits into from
May 31, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/12732.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add new `media_retention` options to the homeserver config for routinely cleaning up non-recently accessed media.
29 changes: 28 additions & 1 deletion docs/usage/configuration/config_documentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -1407,7 +1407,7 @@ federation_rr_transactions_per_room_per_second: 40
```
---
## Media Store ##
Config options relating to Synapse media store.
Config options related to Synapse's media store.

---
Config option: `enable_media_repo`
Expand Down Expand Up @@ -1511,6 +1511,33 @@ thumbnail_sizes:
height: 600
method: scale
```
---
Config option: `media_retention`

Controls whether local media and entries in the remote media cache
(media that is downloaded from other homeservers) should be removed
under certain conditions, typically for the purpose of saving space.

Purging media files will be the carried out by the media worker
(that is, the worker that has the `enable_media_repo` homeserver config
option set to 'true'). This may be the main process.

The `media_retention.local_media_lifetime` and
`media_retention.remote_media_lifetime` config options control whether
media will be purged if it has not been accessed in a given amount of
time. Note that media is 'accessed' when loaded in a room in a client, or
otherwise downloaded by a local or remote user. If the media has never
been accessed, the media's creation time is used instead. Both thumbnails
and the original media will be removed. If either of these options are unset,
then media of that type will not be purged.

Example configuration:
```yaml
media_retention:
local_media_lifetime: 90d
remote_media_lifetime: 14d
```
---
Config option: `url_preview_enabled`

This setting determines whether the preview URL API is enabled.
Expand Down
16 changes: 16 additions & 0 deletions synapse/config/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,22 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None:
"url_preview_accept_language"
) or ["en"]

media_retention = config.get("media_retention") or {}

self.media_retention_local_media_lifetime_ms = None
local_media_lifetime = media_retention.get("local_media_lifetime")
if local_media_lifetime is not None:
self.media_retention_local_media_lifetime_ms = self.parse_duration(
local_media_lifetime
)

self.media_retention_remote_media_lifetime_ms = None
remote_media_lifetime = media_retention.get("remote_media_lifetime")
if remote_media_lifetime is not None:
self.media_retention_remote_media_lifetime_ms = self.parse_duration(
remote_media_lifetime
)

def generate_config_section(self, data_dir_path: str, **kwargs: Any) -> str:
assert data_dir_path is not None
media_store = os.path.join(data_dir_path, "media_store")
Expand Down
71 changes: 70 additions & 1 deletion synapse/rest/media/v1/media_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,12 @@
logger = logging.getLogger(__name__)


UPDATE_RECENTLY_ACCESSED_TS = 60 * 1000
# How often to run the background job to update the "recently accessed"
# attribute of local and remote media.
UPDATE_RECENTLY_ACCESSED_TS = 60 * 1000 # 1 minute
# How often to run the background job to check for local and remote media
# that should be purged according to the configured media retention settings.
MEDIA_RETENTION_CHECK_PERIOD_MS = 60 * 60 * 1000 # 1 hour


class MediaRepository:
Expand Down Expand Up @@ -122,11 +127,36 @@ def __init__(self, hs: "HomeServer"):
self._start_update_recently_accessed, UPDATE_RECENTLY_ACCESSED_TS
)

# Media retention configuration options
self._media_retention_local_media_lifetime_ms = (
hs.config.media.media_retention_local_media_lifetime_ms
)
self._media_retention_remote_media_lifetime_ms = (
hs.config.media.media_retention_remote_media_lifetime_ms
)

# Check whether local or remote media retention is configured
if (
hs.config.media.media_retention_local_media_lifetime_ms is not None
or hs.config.media.media_retention_remote_media_lifetime_ms is not None
):
# Run the background job to apply media retention rules routinely,
# with the duration between runs dictated by the homeserver config.
self.clock.looping_call(
self._start_apply_media_retention_rules,
MEDIA_RETENTION_CHECK_PERIOD_MS,
)

def _start_update_recently_accessed(self) -> Deferred:
return run_as_background_process(
"update_recently_accessed_media", self._update_recently_accessed
)

def _start_apply_media_retention_rules(self) -> Deferred:
return run_as_background_process(
"apply_media_retention_rules", self._apply_media_retention_rules
)

async def _update_recently_accessed(self) -> None:
remote_media = self.recently_accessed_remotes
self.recently_accessed_remotes = set()
Expand Down Expand Up @@ -835,6 +865,45 @@ async def _generate_thumbnails(

return {"width": m_width, "height": m_height}

async def _apply_media_retention_rules(self) -> None:
"""
Purge old local and remote media according to the media retention rules
defined in the homeserver config.
"""
# Purge remote media
if self._media_retention_remote_media_lifetime_ms is not None:
# Calculate a threshold timestamp derived from the configured lifetime. Any
# media that has not been accessed since this timestamp will be removed.
remote_media_threshold_timestamp_ms = (
self.clock.time_msec() - self._media_retention_remote_media_lifetime_ms
)

logger.info(
"Purging remote media last accessed before"
f" {remote_media_threshold_timestamp_ms}"
)

await self.delete_old_remote_media(
before_ts=remote_media_threshold_timestamp_ms
)

# And now do the same for local media
if self._media_retention_local_media_lifetime_ms is not None:
# This works the same as the remote media threshold
local_media_threshold_timestamp_ms = (
self.clock.time_msec() - self._media_retention_local_media_lifetime_ms
)

logger.info(
"Purging local media last accessed before"
f" {local_media_threshold_timestamp_ms}"
)

await self.delete_old_local_media(
before_ts=local_media_threshold_timestamp_ms,
keep_profiles=True,
)

async def delete_old_remote_media(self, before_ts: int) -> Dict[str, int]:
old_media = await self.store.get_remote_media_before(before_ts)

Expand Down
Loading