Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 2fc787c

Browse files
Add config options for media retention (#12732)
1 parent 641908f commit 2fc787c

File tree

5 files changed

+353
-2
lines changed

5 files changed

+353
-2
lines changed

changelog.d/12732.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add new `media_retention` options to the homeserver config for routinely cleaning up non-recently accessed media.

docs/usage/configuration/config_documentation.md

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1459,7 +1459,7 @@ federation_rr_transactions_per_room_per_second: 40
14591459
```
14601460
---
14611461
## Media Store ##
1462-
Config options relating to Synapse media store.
1462+
Config options related to Synapse's media store.
14631463

14641464
---
14651465
Config option: `enable_media_repo`
@@ -1563,6 +1563,33 @@ thumbnail_sizes:
15631563
height: 600
15641564
method: scale
15651565
```
1566+
---
1567+
Config option: `media_retention`
1568+
1569+
Controls whether local media and entries in the remote media cache
1570+
(media that is downloaded from other homeservers) should be removed
1571+
under certain conditions, typically for the purpose of saving space.
1572+
1573+
Purging media files will be the carried out by the media worker
1574+
(that is, the worker that has the `enable_media_repo` homeserver config
1575+
option set to 'true'). This may be the main process.
1576+
1577+
The `media_retention.local_media_lifetime` and
1578+
`media_retention.remote_media_lifetime` config options control whether
1579+
media will be purged if it has not been accessed in a given amount of
1580+
time. Note that media is 'accessed' when loaded in a room in a client, or
1581+
otherwise downloaded by a local or remote user. If the media has never
1582+
been accessed, the media's creation time is used instead. Both thumbnails
1583+
and the original media will be removed. If either of these options are unset,
1584+
then media of that type will not be purged.
1585+
1586+
Example configuration:
1587+
```yaml
1588+
media_retention:
1589+
local_media_lifetime: 90d
1590+
remote_media_lifetime: 14d
1591+
```
1592+
---
15661593
Config option: `url_preview_enabled`
15671594

15681595
This setting determines whether the preview URL API is enabled.

synapse/config/repository.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,22 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None:
223223
"url_preview_accept_language"
224224
) or ["en"]
225225

226+
media_retention = config.get("media_retention") or {}
227+
228+
self.media_retention_local_media_lifetime_ms = None
229+
local_media_lifetime = media_retention.get("local_media_lifetime")
230+
if local_media_lifetime is not None:
231+
self.media_retention_local_media_lifetime_ms = self.parse_duration(
232+
local_media_lifetime
233+
)
234+
235+
self.media_retention_remote_media_lifetime_ms = None
236+
remote_media_lifetime = media_retention.get("remote_media_lifetime")
237+
if remote_media_lifetime is not None:
238+
self.media_retention_remote_media_lifetime_ms = self.parse_duration(
239+
remote_media_lifetime
240+
)
241+
226242
def generate_config_section(self, data_dir_path: str, **kwargs: Any) -> str:
227243
assert data_dir_path is not None
228244
media_store = os.path.join(data_dir_path, "media_store")

synapse/rest/media/v1/media_repository.py

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,12 @@
6565
logger = logging.getLogger(__name__)
6666

6767

68-
UPDATE_RECENTLY_ACCESSED_TS = 60 * 1000
68+
# How often to run the background job to update the "recently accessed"
69+
# attribute of local and remote media.
70+
UPDATE_RECENTLY_ACCESSED_TS = 60 * 1000 # 1 minute
71+
# How often to run the background job to check for local and remote media
72+
# that should be purged according to the configured media retention settings.
73+
MEDIA_RETENTION_CHECK_PERIOD_MS = 60 * 60 * 1000 # 1 hour
6974

7075

7176
class MediaRepository:
@@ -122,11 +127,36 @@ def __init__(self, hs: "HomeServer"):
122127
self._start_update_recently_accessed, UPDATE_RECENTLY_ACCESSED_TS
123128
)
124129

130+
# Media retention configuration options
131+
self._media_retention_local_media_lifetime_ms = (
132+
hs.config.media.media_retention_local_media_lifetime_ms
133+
)
134+
self._media_retention_remote_media_lifetime_ms = (
135+
hs.config.media.media_retention_remote_media_lifetime_ms
136+
)
137+
138+
# Check whether local or remote media retention is configured
139+
if (
140+
hs.config.media.media_retention_local_media_lifetime_ms is not None
141+
or hs.config.media.media_retention_remote_media_lifetime_ms is not None
142+
):
143+
# Run the background job to apply media retention rules routinely,
144+
# with the duration between runs dictated by the homeserver config.
145+
self.clock.looping_call(
146+
self._start_apply_media_retention_rules,
147+
MEDIA_RETENTION_CHECK_PERIOD_MS,
148+
)
149+
125150
def _start_update_recently_accessed(self) -> Deferred:
126151
return run_as_background_process(
127152
"update_recently_accessed_media", self._update_recently_accessed
128153
)
129154

155+
def _start_apply_media_retention_rules(self) -> Deferred:
156+
return run_as_background_process(
157+
"apply_media_retention_rules", self._apply_media_retention_rules
158+
)
159+
130160
async def _update_recently_accessed(self) -> None:
131161
remote_media = self.recently_accessed_remotes
132162
self.recently_accessed_remotes = set()
@@ -835,6 +865,45 @@ async def _generate_thumbnails(
835865

836866
return {"width": m_width, "height": m_height}
837867

868+
async def _apply_media_retention_rules(self) -> None:
869+
"""
870+
Purge old local and remote media according to the media retention rules
871+
defined in the homeserver config.
872+
"""
873+
# Purge remote media
874+
if self._media_retention_remote_media_lifetime_ms is not None:
875+
# Calculate a threshold timestamp derived from the configured lifetime. Any
876+
# media that has not been accessed since this timestamp will be removed.
877+
remote_media_threshold_timestamp_ms = (
878+
self.clock.time_msec() - self._media_retention_remote_media_lifetime_ms
879+
)
880+
881+
logger.info(
882+
"Purging remote media last accessed before"
883+
f" {remote_media_threshold_timestamp_ms}"
884+
)
885+
886+
await self.delete_old_remote_media(
887+
before_ts=remote_media_threshold_timestamp_ms
888+
)
889+
890+
# And now do the same for local media
891+
if self._media_retention_local_media_lifetime_ms is not None:
892+
# This works the same as the remote media threshold
893+
local_media_threshold_timestamp_ms = (
894+
self.clock.time_msec() - self._media_retention_local_media_lifetime_ms
895+
)
896+
897+
logger.info(
898+
"Purging local media last accessed before"
899+
f" {local_media_threshold_timestamp_ms}"
900+
)
901+
902+
await self.delete_old_local_media(
903+
before_ts=local_media_threshold_timestamp_ms,
904+
keep_profiles=True,
905+
)
906+
838907
async def delete_old_remote_media(self, before_ts: int) -> Dict[str, int]:
839908
old_media = await self.store.get_remote_media_before(before_ts)
840909

0 commit comments

Comments
 (0)