Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit d6c3b75

Browse files
authored
Request & follow redirects for /media/v3/download (#16701)
Implement MSC3860 to follow redirects for federated media downloads. Note that the Client-Server API doesn't support this (yet) since the media repository in Synapse doesn't have a way of supporting redirects.
1 parent a146784 commit d6c3b75

File tree

7 files changed

+212
-38
lines changed

7 files changed

+212
-38
lines changed

changelog.d/16701.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Follow redirects when downloading media over federation (per [MSC3860](https://github.com/matrix-org/matrix-spec-proposals/pull/3860)).

synapse/federation/federation_client.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
TYPE_CHECKING,
2222
AbstractSet,
2323
Awaitable,
24+
BinaryIO,
2425
Callable,
2526
Collection,
2627
Container,
@@ -1862,6 +1863,43 @@ def filter_user_id(user_id: str) -> bool:
18621863

18631864
return filtered_statuses, filtered_failures
18641865

1866+
async def download_media(
1867+
self,
1868+
destination: str,
1869+
media_id: str,
1870+
output_stream: BinaryIO,
1871+
max_size: int,
1872+
max_timeout_ms: int,
1873+
) -> Tuple[int, Dict[bytes, List[bytes]]]:
1874+
try:
1875+
return await self.transport_layer.download_media_v3(
1876+
destination,
1877+
media_id,
1878+
output_stream=output_stream,
1879+
max_size=max_size,
1880+
max_timeout_ms=max_timeout_ms,
1881+
)
1882+
except HttpResponseException as e:
1883+
# If an error is received that is due to an unrecognised endpoint,
1884+
# fallback to the r0 endpoint. Otherwise, consider it a legitimate error
1885+
# and raise.
1886+
if not is_unknown_endpoint(e):
1887+
raise
1888+
1889+
logger.debug(
1890+
"Couldn't download media %s/%s with the v3 API, falling back to the r0 API",
1891+
destination,
1892+
media_id,
1893+
)
1894+
1895+
return await self.transport_layer.download_media_r0(
1896+
destination,
1897+
media_id,
1898+
output_stream=output_stream,
1899+
max_size=max_size,
1900+
max_timeout_ms=max_timeout_ms,
1901+
)
1902+
18651903

18661904
@attr.s(frozen=True, slots=True, auto_attribs=True)
18671905
class TimestampToEventResponse:

synapse/federation/transport/client.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from typing import (
1919
TYPE_CHECKING,
2020
Any,
21+
BinaryIO,
2122
Callable,
2223
Collection,
2324
Dict,
@@ -804,6 +805,58 @@ async def get_account_status(
804805
destination=destination, path=path, data={"user_ids": user_ids}
805806
)
806807

808+
async def download_media_r0(
809+
self,
810+
destination: str,
811+
media_id: str,
812+
output_stream: BinaryIO,
813+
max_size: int,
814+
max_timeout_ms: int,
815+
) -> Tuple[int, Dict[bytes, List[bytes]]]:
816+
path = f"/_matrix/media/r0/download/{destination}/{media_id}"
817+
818+
return await self.client.get_file(
819+
destination,
820+
path,
821+
output_stream=output_stream,
822+
max_size=max_size,
823+
args={
824+
# tell the remote server to 404 if it doesn't
825+
# recognise the server_name, to make sure we don't
826+
# end up with a routing loop.
827+
"allow_remote": "false",
828+
"timeout_ms": str(max_timeout_ms),
829+
},
830+
)
831+
832+
async def download_media_v3(
833+
self,
834+
destination: str,
835+
media_id: str,
836+
output_stream: BinaryIO,
837+
max_size: int,
838+
max_timeout_ms: int,
839+
) -> Tuple[int, Dict[bytes, List[bytes]]]:
840+
path = f"/_matrix/media/v3/download/{destination}/{media_id}"
841+
842+
return await self.client.get_file(
843+
destination,
844+
path,
845+
output_stream=output_stream,
846+
max_size=max_size,
847+
args={
848+
# tell the remote server to 404 if it doesn't
849+
# recognise the server_name, to make sure we don't
850+
# end up with a routing loop.
851+
"allow_remote": "false",
852+
"timeout_ms": str(max_timeout_ms),
853+
# Matrix 1.7 allows for this to redirect to another URL, this should
854+
# just be ignored for an old homeserver, so always provide it.
855+
"allow_redirect": "true",
856+
},
857+
follow_redirects=True,
858+
)
859+
807860

808861
def _create_path(federation_prefix: str, path: str, *args: str) -> str:
809862
"""

synapse/http/matrixfederationclient.py

Lines changed: 57 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -153,12 +153,18 @@ class MatrixFederationRequest:
153153
"""Query arguments.
154154
"""
155155

156-
txn_id: Optional[str] = None
157-
"""Unique ID for this request (for logging)
156+
txn_id: str = attr.ib(init=False)
157+
"""Unique ID for this request (for logging), this is autogenerated.
158158
"""
159159

160-
uri: bytes = attr.ib(init=False)
161-
"""The URI of this request
160+
uri: bytes = b""
161+
"""The URI of this request, usually generated from the above information.
162+
"""
163+
164+
_generate_uri: bool = True
165+
"""True to automatically generate the uri field based on the above information.
166+
167+
Set to False if manually configuring the URI.
162168
"""
163169

164170
def __attrs_post_init__(self) -> None:
@@ -168,22 +174,23 @@ def __attrs_post_init__(self) -> None:
168174

169175
object.__setattr__(self, "txn_id", txn_id)
170176

171-
destination_bytes = self.destination.encode("ascii")
172-
path_bytes = self.path.encode("ascii")
173-
query_bytes = encode_query_args(self.query)
174-
175-
# The object is frozen so we can pre-compute this.
176-
uri = urllib.parse.urlunparse(
177-
(
178-
b"matrix-federation",
179-
destination_bytes,
180-
path_bytes,
181-
None,
182-
query_bytes,
183-
b"",
177+
if self._generate_uri:
178+
destination_bytes = self.destination.encode("ascii")
179+
path_bytes = self.path.encode("ascii")
180+
query_bytes = encode_query_args(self.query)
181+
182+
# The object is frozen so we can pre-compute this.
183+
uri = urllib.parse.urlunparse(
184+
(
185+
b"matrix-federation",
186+
destination_bytes,
187+
path_bytes,
188+
None,
189+
query_bytes,
190+
b"",
191+
)
184192
)
185-
)
186-
object.__setattr__(self, "uri", uri)
193+
object.__setattr__(self, "uri", uri)
187194

188195
def get_json(self) -> Optional[JsonDict]:
189196
if self.json_callback:
@@ -513,6 +520,7 @@ async def _send_request(
513520
ignore_backoff: bool = False,
514521
backoff_on_404: bool = False,
515522
backoff_on_all_error_codes: bool = False,
523+
follow_redirects: bool = False,
516524
) -> IResponse:
517525
"""
518526
Sends a request to the given server.
@@ -555,6 +563,9 @@ async def _send_request(
555563
backoff_on_404: Back off if we get a 404
556564
backoff_on_all_error_codes: Back off if we get any error response
557565
566+
follow_redirects: True to follow the Location header of 307/308 redirect
567+
responses. This does not recurse.
568+
558569
Returns:
559570
Resolves with the HTTP response object on success.
560571
@@ -714,6 +725,26 @@ async def _send_request(
714725
response.code,
715726
response_phrase,
716727
)
728+
elif (
729+
response.code in (307, 308)
730+
and follow_redirects
731+
and response.headers.hasHeader("Location")
732+
):
733+
# The Location header *might* be relative so resolve it.
734+
location = response.headers.getRawHeaders(b"Location")[0]
735+
new_uri = urllib.parse.urljoin(request.uri, location)
736+
737+
return await self._send_request(
738+
attr.evolve(request, uri=new_uri, generate_uri=False),
739+
retry_on_dns_fail,
740+
timeout,
741+
long_retries,
742+
ignore_backoff,
743+
backoff_on_404,
744+
backoff_on_all_error_codes,
745+
# Do not continue following redirects.
746+
follow_redirects=False,
747+
)
717748
else:
718749
logger.info(
719750
"{%s} [%s] Got response headers: %d %s",
@@ -1383,6 +1414,7 @@ async def get_file(
13831414
retry_on_dns_fail: bool = True,
13841415
max_size: Optional[int] = None,
13851416
ignore_backoff: bool = False,
1417+
follow_redirects: bool = False,
13861418
) -> Tuple[int, Dict[bytes, List[bytes]]]:
13871419
"""GETs a file from a given homeserver
13881420
Args:
@@ -1392,6 +1424,8 @@ async def get_file(
13921424
args: Optional dictionary used to create the query string.
13931425
ignore_backoff: true to ignore the historical backoff data
13941426
and try the request anyway.
1427+
follow_redirects: True to follow the Location header of 307/308 redirect
1428+
responses. This does not recurse.
13951429
13961430
Returns:
13971431
Resolves with an (int,dict) tuple of
@@ -1412,7 +1446,10 @@ async def get_file(
14121446
)
14131447

14141448
response = await self._send_request(
1415-
request, retry_on_dns_fail=retry_on_dns_fail, ignore_backoff=ignore_backoff
1449+
request,
1450+
retry_on_dns_fail=retry_on_dns_fail,
1451+
ignore_backoff=ignore_backoff,
1452+
follow_redirects=follow_redirects,
14161453
)
14171454

14181455
headers = dict(response.headers.getAllRawHeaders())

synapse/media/media_repository.py

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ class MediaRepository:
7777
def __init__(self, hs: "HomeServer"):
7878
self.hs = hs
7979
self.auth = hs.get_auth()
80-
self.client = hs.get_federation_http_client()
80+
self.client = hs.get_federation_client()
8181
self.clock = hs.get_clock()
8282
self.server_name = hs.hostname
8383
self.store = hs.get_datastores().main
@@ -644,22 +644,13 @@ async def _download_remote_file(
644644
file_info = FileInfo(server_name=server_name, file_id=file_id)
645645

646646
with self.media_storage.store_into_file(file_info) as (f, fname, finish):
647-
request_path = "/".join(
648-
("/_matrix/media/r0/download", server_name, media_id)
649-
)
650647
try:
651-
length, headers = await self.client.get_file(
648+
length, headers = await self.client.download_media(
652649
server_name,
653-
request_path,
650+
media_id,
654651
output_stream=f,
655652
max_size=self.max_upload_size,
656-
args={
657-
# tell the remote server to 404 if it doesn't
658-
# recognise the server_name, to make sure we don't
659-
# end up with a routing loop.
660-
"allow_remote": "false",
661-
"timeout_ms": str(max_timeout_ms),
662-
},
653+
max_timeout_ms=max_timeout_ms,
663654
)
664655
except RequestSendFailed as e:
665656
logger.warning(

tests/media/test_media_storage.py

Lines changed: 58 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,11 @@
2727

2828
from twisted.internet import defer
2929
from twisted.internet.defer import Deferred
30+
from twisted.python.failure import Failure
3031
from twisted.test.proto_helpers import MemoryReactor
3132
from twisted.web.resource import Resource
3233

33-
from synapse.api.errors import Codes
34+
from synapse.api.errors import Codes, HttpResponseException
3435
from synapse.events import EventBase
3536
from synapse.http.types import QueryParams
3637
from synapse.logging.context import make_deferred_yieldable
@@ -247,6 +248,7 @@ def get_file(
247248
retry_on_dns_fail: bool = True,
248249
max_size: Optional[int] = None,
249250
ignore_backoff: bool = False,
251+
follow_redirects: bool = False,
250252
) -> "Deferred[Tuple[int, Dict[bytes, List[bytes]]]]":
251253
"""A mock for MatrixFederationHttpClient.get_file."""
252254

@@ -257,10 +259,15 @@ def write_to(
257259
output_stream.write(data)
258260
return response
259261

262+
def write_err(f: Failure) -> Failure:
263+
f.trap(HttpResponseException)
264+
output_stream.write(f.value.response)
265+
return f
266+
260267
d: Deferred[Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]] = Deferred()
261268
self.fetches.append((d, destination, path, args))
262269
# Note that this callback changes the value held by d.
263-
d_after_callback = d.addCallback(write_to)
270+
d_after_callback = d.addCallbacks(write_to, write_err)
264271
return make_deferred_yieldable(d_after_callback)
265272

266273
# Mock out the homeserver's MatrixFederationHttpClient
@@ -316,10 +323,11 @@ def _req(
316323
self.assertEqual(len(self.fetches), 1)
317324
self.assertEqual(self.fetches[0][1], "example.com")
318325
self.assertEqual(
319-
self.fetches[0][2], "/_matrix/media/r0/download/" + self.media_id
326+
self.fetches[0][2], "/_matrix/media/v3/download/" + self.media_id
320327
)
321328
self.assertEqual(
322-
self.fetches[0][3], {"allow_remote": "false", "timeout_ms": "20000"}
329+
self.fetches[0][3],
330+
{"allow_remote": "false", "timeout_ms": "20000", "allow_redirect": "true"},
323331
)
324332

325333
headers = {
@@ -671,6 +679,52 @@ def test_cross_origin_resource_policy_header(self) -> None:
671679
[b"cross-origin"],
672680
)
673681

682+
def test_unknown_v3_endpoint(self) -> None:
683+
"""
684+
If the v3 endpoint fails, try the r0 one.
685+
"""
686+
channel = self.make_request(
687+
"GET",
688+
f"/_matrix/media/v3/download/{self.media_id}",
689+
shorthand=False,
690+
await_result=False,
691+
)
692+
self.pump()
693+
694+
# We've made one fetch, to example.com, using the media URL, and asking
695+
# the other server not to do a remote fetch
696+
self.assertEqual(len(self.fetches), 1)
697+
self.assertEqual(self.fetches[0][1], "example.com")
698+
self.assertEqual(
699+
self.fetches[0][2], "/_matrix/media/v3/download/" + self.media_id
700+
)
701+
702+
# The result which says the endpoint is unknown.
703+
unknown_endpoint = b'{"errcode":"M_UNRECOGNIZED","error":"Unknown request"}'
704+
self.fetches[0][0].errback(
705+
HttpResponseException(404, "NOT FOUND", unknown_endpoint)
706+
)
707+
708+
self.pump()
709+
710+
# There should now be another request to the r0 URL.
711+
self.assertEqual(len(self.fetches), 2)
712+
self.assertEqual(self.fetches[1][1], "example.com")
713+
self.assertEqual(
714+
self.fetches[1][2], f"/_matrix/media/r0/download/{self.media_id}"
715+
)
716+
717+
headers = {
718+
b"Content-Length": [b"%d" % (len(self.test_image.data))],
719+
}
720+
721+
self.fetches[1][0].callback(
722+
(self.test_image.data, (len(self.test_image.data), headers))
723+
)
724+
725+
self.pump()
726+
self.assertEqual(channel.code, 200)
727+
674728

675729
class TestSpamCheckerLegacy:
676730
"""A spam checker module that rejects all media that includes the bytes

0 commit comments

Comments
 (0)