Skip to content

Commit 59a15da

Browse files
Add caching support to media endpoints (#18235)
We do a few things in this PR to better support caching: 1. Change `Cache-Control` header to allow intermediary proxies to cache media *only* if they revalidate on every request. This means that the intermediary cache will still send the request to Synapse but with a `If-None-Match` header, at which point Synapse can check auth and respond with a 304 and empty content. 2. Add `ETag` response header to all media responses. We hardcode this to `1` since all media is immutable (beyond being deleted). 3. Check for `If-None-Match` header (after checking for auth), and if it matches then respond with a 304 and empty body. --------- Co-authored-by: Andrew Morgan <[email protected]>
1 parent a278c0d commit 59a15da

File tree

6 files changed

+253
-7
lines changed

6 files changed

+253
-7
lines changed

changelog.d/18235.misc

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add caching support to media endpoints.

synapse/media/_base.py

+67-7
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,9 @@
118118
# Maximum allowed timeout_ms for download and thumbnail requests
119119
MAXIMUM_ALLOWED_MAX_TIMEOUT_MS = 60_000
120120

121+
# The ETag header value to use for immutable media. This can be anything.
122+
_IMMUTABLE_ETAG = "1"
123+
121124

122125
def respond_404(request: SynapseRequest) -> None:
123126
assert request.path is not None
@@ -224,12 +227,7 @@ def _quote(x: str) -> str:
224227

225228
request.setHeader(b"Content-Disposition", disposition.encode("ascii"))
226229

227-
# cache for at least a day.
228-
# XXX: we might want to turn this off for data we don't want to
229-
# recommend caching as it's sensitive or private - or at least
230-
# select private. don't bother setting Expires as all our
231-
# clients are smart enough to be happy with Cache-Control
232-
request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400")
230+
_add_cache_headers(request)
233231

234232
if file_size is not None:
235233
request.setHeader(b"Content-Length", b"%d" % (file_size,))
@@ -240,6 +238,26 @@ def _quote(x: str) -> str:
240238
request.setHeader(b"X-Robots-Tag", "noindex, nofollow, noarchive, noimageindex")
241239

242240

241+
def _add_cache_headers(request: Request) -> None:
242+
"""Adds the appropriate cache headers to the response"""
243+
244+
# Cache on the client for at least a day.
245+
#
246+
# We set this to "public,s-maxage=0,proxy-revalidate" to allow CDNs to cache
247+
# the media, so long as they "revalidate" the media on every request. By
248+
# revalidate, we mean send the request to Synapse with a `If-None-Match`
249+
# header, to which Synapse can either respond with a 304 if the user is
250+
# authenticated/authorized, or a 401/403 if they're not.
251+
request.setHeader(
252+
b"Cache-Control", b"public,max-age=86400,s-maxage=0,proxy-revalidate"
253+
)
254+
255+
# Set an ETag header to allow requesters to use it in requests to check if
256+
# the cache is still valid. Since media is immutable (though may be
257+
# deleted), we just set this to a constant.
258+
request.setHeader(b"ETag", _IMMUTABLE_ETAG)
259+
260+
243261
# separators as defined in RFC2616. SP and HT are handled separately.
244262
# see _can_encode_filename_as_token.
245263
_FILENAME_SEPARATOR_CHARS = {
@@ -336,13 +354,15 @@ def _quote(x: str) -> str:
336354

337355
from synapse.media.media_storage import MultipartFileConsumer
338356

357+
_add_cache_headers(request)
358+
339359
# note that currently the json_object is just {}, this will change when linked media
340360
# is implemented
341361
multipart_consumer = MultipartFileConsumer(
342362
clock,
343363
request,
344364
media_type,
345-
{},
365+
{}, # Note: if we change this we need to change the returned ETag.
346366
disposition,
347367
media_length,
348368
)
@@ -419,6 +439,46 @@ async def respond_with_responder(
419439
finish_request(request)
420440

421441

442+
def respond_with_304(request: SynapseRequest) -> None:
443+
request.setResponseCode(304)
444+
445+
# could alternatively use request.notifyFinish() and flip a flag when
446+
# the Deferred fires, but since the flag is RIGHT THERE it seems like
447+
# a waste.
448+
if request._disconnected:
449+
logger.warning(
450+
"Not sending response to request %s, already disconnected.", request
451+
)
452+
return None
453+
454+
_add_cache_headers(request)
455+
456+
request.finish()
457+
458+
459+
def check_for_cached_entry_and_respond(request: SynapseRequest) -> bool:
460+
"""Check if the request has a conditional header that allows us to return a
461+
304 Not Modified response, and if it does, return a 304 response.
462+
463+
This handles clients and intermediary proxies caching media.
464+
This method assumes that the user has already been
465+
authorised to request the media.
466+
467+
Returns True if we have responded."""
468+
469+
# We've checked the user has access to the media, so we now check if it
470+
# is a "conditional request" and we can just return a `304 Not Modified`
471+
# response. Since media is immutable (though may be deleted), we just
472+
# check this is the expected constant.
473+
etag = request.getHeader("If-None-Match")
474+
if etag == _IMMUTABLE_ETAG:
475+
# Return a `304 Not modified`.
476+
respond_with_304(request)
477+
return True
478+
479+
return False
480+
481+
422482
class Responder(ABC):
423483
"""Represents a response that can be streamed to the requester.
424484

synapse/media/media_repository.py

+17
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
FileInfo,
5353
Responder,
5454
ThumbnailInfo,
55+
check_for_cached_entry_and_respond,
5556
get_filename_from_headers,
5657
respond_404,
5758
respond_with_multipart_responder,
@@ -459,6 +460,11 @@ async def get_local_media(
459460

460461
self.mark_recently_accessed(None, media_id)
461462

463+
# Once we've checked auth we can return early if the media is cached on
464+
# the client
465+
if check_for_cached_entry_and_respond(request):
466+
return
467+
462468
media_type = media_info.media_type
463469
if not media_type:
464470
media_type = "application/octet-stream"
@@ -538,6 +544,17 @@ async def get_remote_media(
538544
allow_authenticated,
539545
)
540546

547+
# Check if the media is cached on the client, if so return 304. We need
548+
# to do this after we have fetched remote media, as we need it to do the
549+
# auth.
550+
if check_for_cached_entry_and_respond(request):
551+
# We always need to use the responder.
552+
if responder:
553+
with responder:
554+
pass
555+
556+
return
557+
541558
# We deliberately stream the file outside the lock
542559
if responder and media_info:
543560
upload_name = name if name else media_info.upload_name

synapse/media/thumbnailer.py

+19
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from synapse.media._base import (
3535
FileInfo,
3636
ThumbnailInfo,
37+
check_for_cached_entry_and_respond,
3738
respond_404,
3839
respond_with_file,
3940
respond_with_multipart_responder,
@@ -294,6 +295,11 @@ async def respond_local_thumbnail(
294295
if media_info.authenticated:
295296
raise NotFoundError()
296297

298+
# Once we've checked auth we can return early if the media is cached on
299+
# the client
300+
if check_for_cached_entry_and_respond(request):
301+
return
302+
297303
thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
298304
await self._select_and_respond_with_thumbnail(
299305
request,
@@ -334,6 +340,11 @@ async def select_or_generate_local_thumbnail(
334340
if media_info.authenticated:
335341
raise NotFoundError()
336342

343+
# Once we've checked auth we can return early if the media is cached on
344+
# the client
345+
if check_for_cached_entry_and_respond(request):
346+
return
347+
337348
thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
338349
for info in thumbnail_infos:
339350
t_w = info.width == desired_width
@@ -431,6 +442,10 @@ async def select_or_generate_remote_thumbnail(
431442
respond_404(request)
432443
return
433444

445+
# Check if the media is cached on the client, if so return 304.
446+
if check_for_cached_entry_and_respond(request):
447+
return
448+
434449
thumbnail_infos = await self.store.get_remote_media_thumbnails(
435450
server_name, media_id
436451
)
@@ -510,6 +525,10 @@ async def respond_remote_thumbnail(
510525
if media_info.authenticated:
511526
raise NotFoundError()
512527

528+
# Check if the media is cached on the client, if so return 304.
529+
if check_for_cached_entry_and_respond(request):
530+
return
531+
513532
thumbnail_infos = await self.store.get_remote_media_thumbnails(
514533
server_name, media_id
515534
)

tests/federation/test_federation_media.py

+39
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,45 @@ def test_file_download(self) -> None:
147147
found_file = any(SMALL_PNG in field for field in stripped_bytes)
148148
self.assertTrue(found_file)
149149

150+
def test_federation_etag(self) -> None:
151+
"""Test that federation ETags work"""
152+
153+
content = io.BytesIO(b"file_to_stream")
154+
content_uri = self.get_success(
155+
self.media_repo.create_content(
156+
"text/plain",
157+
"test_upload",
158+
content,
159+
46,
160+
UserID.from_string("@user_id:whatever.org"),
161+
)
162+
)
163+
164+
channel = self.make_signed_federation_request(
165+
"GET",
166+
f"/_matrix/federation/v1/media/download/{content_uri.media_id}",
167+
)
168+
self.pump()
169+
self.assertEqual(200, channel.code)
170+
171+
# We expect exactly one ETag header.
172+
etags = channel.headers.getRawHeaders("ETag")
173+
self.assertIsNotNone(etags)
174+
assert etags is not None # For mypy
175+
self.assertEqual(len(etags), 1)
176+
etag = etags[0]
177+
178+
# Refetching with the etag should result in 304 and empty body.
179+
channel = self.make_signed_federation_request(
180+
"GET",
181+
f"/_matrix/federation/v1/media/download/{content_uri.media_id}",
182+
custom_headers=[("If-None-Match", etag)],
183+
)
184+
self.pump()
185+
self.assertEqual(channel.code, 304)
186+
self.assertEqual(channel.is_finished(), True)
187+
self.assertNotIn("body", channel.result)
188+
150189

151190
class FederationThumbnailTest(unittest.FederatingHomeserverTestCase):
152191
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:

tests/rest/client/test_media.py

+110
Original file line numberDiff line numberDiff line change
@@ -2676,3 +2676,113 @@ def test_authenticated_media(self) -> None:
26762676
access_token=self.tok,
26772677
)
26782678
self.assertEqual(channel10.code, 200)
2679+
2680+
def test_authenticated_media_etag(self) -> None:
2681+
"""Test that ETag works correctly with authenticated media over client
2682+
APIs"""
2683+
2684+
# upload some local media with authentication on
2685+
channel = self.make_request(
2686+
"POST",
2687+
"_matrix/media/v3/upload?filename=test_png_upload",
2688+
SMALL_PNG,
2689+
self.tok,
2690+
shorthand=False,
2691+
content_type=b"image/png",
2692+
custom_headers=[("Content-Length", str(67))],
2693+
)
2694+
self.assertEqual(channel.code, 200)
2695+
res = channel.json_body.get("content_uri")
2696+
assert res is not None
2697+
uri = res.split("mxc://")[1]
2698+
2699+
# Check standard media endpoint
2700+
self._check_caching(f"/download/{uri}")
2701+
2702+
# check thumbnails as well
2703+
params = "?width=32&height=32&method=crop"
2704+
self._check_caching(f"/thumbnail/{uri}{params}")
2705+
2706+
# Inject a piece of remote media.
2707+
file_id = "abcdefg12345"
2708+
file_info = FileInfo(server_name="lonelyIsland", file_id=file_id)
2709+
2710+
media_storage = self.hs.get_media_repository().media_storage
2711+
2712+
ctx = media_storage.store_into_file(file_info)
2713+
(f, fname) = self.get_success(ctx.__aenter__())
2714+
f.write(SMALL_PNG)
2715+
self.get_success(ctx.__aexit__(None, None, None))
2716+
2717+
# we write the authenticated status when storing media, so this should pick up
2718+
# config and authenticate the media
2719+
self.get_success(
2720+
self.store.store_cached_remote_media(
2721+
origin="lonelyIsland",
2722+
media_id="52",
2723+
media_type="image/png",
2724+
media_length=1,
2725+
time_now_ms=self.clock.time_msec(),
2726+
upload_name="remote_test.png",
2727+
filesystem_id=file_id,
2728+
)
2729+
)
2730+
2731+
# ensure we have thumbnails for the non-dynamic code path
2732+
if self.extra_config == {"dynamic_thumbnails": False}:
2733+
self.get_success(
2734+
self.repo._generate_thumbnails(
2735+
"lonelyIsland", "52", file_id, "image/png"
2736+
)
2737+
)
2738+
2739+
self._check_caching("/download/lonelyIsland/52")
2740+
2741+
params = "?width=32&height=32&method=crop"
2742+
self._check_caching(f"/thumbnail/lonelyIsland/52{params}")
2743+
2744+
def _check_caching(self, path: str) -> None:
2745+
"""
2746+
Checks that:
2747+
1. fetching the path returns an ETag header
2748+
2. refetching with the ETag returns a 304 without a body
2749+
3. refetching with the ETag but through unauthenticated endpoint
2750+
returns 404
2751+
"""
2752+
2753+
# Request media over authenticated endpoint, should be found
2754+
channel1 = self.make_request(
2755+
"GET",
2756+
f"/_matrix/client/v1/media{path}",
2757+
access_token=self.tok,
2758+
shorthand=False,
2759+
)
2760+
self.assertEqual(channel1.code, 200)
2761+
2762+
# Should have a single ETag field
2763+
etags = channel1.headers.getRawHeaders("ETag")
2764+
self.assertIsNotNone(etags)
2765+
assert etags is not None # For mypy
2766+
self.assertEqual(len(etags), 1)
2767+
etag = etags[0]
2768+
2769+
# Refetching with the etag should result in 304 and empty body.
2770+
channel2 = self.make_request(
2771+
"GET",
2772+
f"/_matrix/client/v1/media{path}",
2773+
access_token=self.tok,
2774+
shorthand=False,
2775+
custom_headers=[("If-None-Match", etag)],
2776+
)
2777+
self.assertEqual(channel2.code, 304)
2778+
self.assertEqual(channel2.is_finished(), True)
2779+
self.assertNotIn("body", channel2.result)
2780+
2781+
# Refetching with the etag but no access token should result in 404.
2782+
channel3 = self.make_request(
2783+
"GET",
2784+
f"/_matrix/media/r0{path}",
2785+
shorthand=False,
2786+
custom_headers=[("If-None-Match", etag)],
2787+
)
2788+
self.assertEqual(channel3.code, 404)

0 commit comments

Comments
 (0)