-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Request & follow redirects for /media/v3/download #16701
Changes from all commits
c2c5942
fa4f637
8837c38
12c6193
b209ee7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Follow redirects when downloading media over federation (per [MSC3860](https://github.com/matrix-org/matrix-spec-proposals/pull/3860)). |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -153,12 +153,18 @@ class MatrixFederationRequest: | |
"""Query arguments. | ||
""" | ||
|
||
txn_id: Optional[str] = None | ||
"""Unique ID for this request (for logging) | ||
txn_id: str = attr.ib(init=False) | ||
"""Unique ID for this request (for logging), this is autogenerated. | ||
""" | ||
|
||
uri: bytes = attr.ib(init=False) | ||
"""The URI of this request | ||
uri: bytes = b"" | ||
"""The URI of this request, usually generated from the above information. | ||
""" | ||
|
||
_generate_uri: bool = True | ||
"""True to automatically generate the uri field based on the above information. | ||
|
||
Set to False if manually configuring the URI. | ||
""" | ||
Comment on lines
+164
to
168
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I couldn't figure out a better way to force attrs to do what I want. Using a default/factory works if you're either just creating the instance or evolving it and updating the URI, but not when evolving it and wanting to generate the URI again. I figured being explicit was best. |
||
|
||
def __attrs_post_init__(self) -> None: | ||
|
@@ -168,22 +174,23 @@ def __attrs_post_init__(self) -> None: | |
|
||
object.__setattr__(self, "txn_id", txn_id) | ||
|
||
destination_bytes = self.destination.encode("ascii") | ||
path_bytes = self.path.encode("ascii") | ||
query_bytes = encode_query_args(self.query) | ||
|
||
# The object is frozen so we can pre-compute this. | ||
uri = urllib.parse.urlunparse( | ||
( | ||
b"matrix-federation", | ||
destination_bytes, | ||
path_bytes, | ||
None, | ||
query_bytes, | ||
b"", | ||
if self._generate_uri: | ||
destination_bytes = self.destination.encode("ascii") | ||
path_bytes = self.path.encode("ascii") | ||
query_bytes = encode_query_args(self.query) | ||
|
||
# The object is frozen so we can pre-compute this. | ||
uri = urllib.parse.urlunparse( | ||
( | ||
b"matrix-federation", | ||
destination_bytes, | ||
path_bytes, | ||
None, | ||
query_bytes, | ||
b"", | ||
) | ||
) | ||
) | ||
object.__setattr__(self, "uri", uri) | ||
object.__setattr__(self, "uri", uri) | ||
|
||
def get_json(self) -> Optional[JsonDict]: | ||
if self.json_callback: | ||
|
@@ -513,6 +520,7 @@ async def _send_request( | |
ignore_backoff: bool = False, | ||
backoff_on_404: bool = False, | ||
backoff_on_all_error_codes: bool = False, | ||
follow_redirects: bool = False, | ||
) -> IResponse: | ||
""" | ||
Sends a request to the given server. | ||
|
@@ -555,6 +563,9 @@ async def _send_request( | |
backoff_on_404: Back off if we get a 404 | ||
backoff_on_all_error_codes: Back off if we get any error response | ||
|
||
follow_redirects: True to follow the Location header of 307/308 redirect | ||
responses. This does not recurse. | ||
|
||
Returns: | ||
Resolves with the HTTP response object on success. | ||
|
||
|
@@ -714,6 +725,26 @@ async def _send_request( | |
response.code, | ||
response_phrase, | ||
) | ||
elif ( | ||
response.code in (307, 308) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought about wrapping the agent in a |
||
and follow_redirects | ||
and response.headers.hasHeader("Location") | ||
): | ||
# The Location header *might* be relative so resolve it. | ||
location = response.headers.getRawHeaders(b"Location")[0] | ||
new_uri = urllib.parse.urljoin(request.uri, location) | ||
|
||
return await self._send_request( | ||
attr.evolve(request, uri=new_uri, generate_uri=False), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's spelled There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes, pretty much. attrs strips the preceding underscores to let you have 'private' attributes that can be initialized. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
retry_on_dns_fail, | ||
timeout, | ||
long_retries, | ||
ignore_backoff, | ||
backoff_on_404, | ||
backoff_on_all_error_codes, | ||
# Do not continue following redirects. | ||
follow_redirects=False, | ||
) | ||
else: | ||
logger.info( | ||
"{%s} [%s] Got response headers: %d %s", | ||
|
@@ -1383,6 +1414,7 @@ async def get_file( | |
retry_on_dns_fail: bool = True, | ||
max_size: Optional[int] = None, | ||
ignore_backoff: bool = False, | ||
follow_redirects: bool = False, | ||
) -> Tuple[int, Dict[bytes, List[bytes]]]: | ||
"""GETs a file from a given homeserver | ||
Args: | ||
|
@@ -1392,6 +1424,8 @@ async def get_file( | |
args: Optional dictionary used to create the query string. | ||
ignore_backoff: true to ignore the historical backoff data | ||
and try the request anyway. | ||
follow_redirects: True to follow the Location header of 307/308 redirect | ||
responses. This does not recurse. | ||
|
||
Returns: | ||
Resolves with an (int,dict) tuple of | ||
|
@@ -1412,7 +1446,10 @@ async def get_file( | |
) | ||
|
||
response = await self._send_request( | ||
request, retry_on_dns_fail=retry_on_dns_fail, ignore_backoff=ignore_backoff | ||
request, | ||
retry_on_dns_fail=retry_on_dns_fail, | ||
ignore_backoff=ignore_backoff, | ||
follow_redirects=follow_redirects, | ||
) | ||
|
||
headers = dict(response.headers.getAllRawHeaders()) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,10 +27,11 @@ | |
|
||
from twisted.internet import defer | ||
from twisted.internet.defer import Deferred | ||
from twisted.python.failure import Failure | ||
from twisted.test.proto_helpers import MemoryReactor | ||
from twisted.web.resource import Resource | ||
|
||
from synapse.api.errors import Codes | ||
from synapse.api.errors import Codes, HttpResponseException | ||
from synapse.events import EventBase | ||
from synapse.http.types import QueryParams | ||
from synapse.logging.context import make_deferred_yieldable | ||
|
@@ -247,6 +248,7 @@ def get_file( | |
retry_on_dns_fail: bool = True, | ||
max_size: Optional[int] = None, | ||
ignore_backoff: bool = False, | ||
follow_redirects: bool = False, | ||
) -> "Deferred[Tuple[int, Dict[bytes, List[bytes]]]]": | ||
"""A mock for MatrixFederationHttpClient.get_file.""" | ||
|
||
|
@@ -257,10 +259,15 @@ def write_to( | |
output_stream.write(data) | ||
return response | ||
|
||
def write_err(f: Failure) -> Failure: | ||
f.trap(HttpResponseException) | ||
output_stream.write(f.value.response) | ||
return f | ||
Comment on lines
+262
to
+265
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This one was new to me: https://docs.twisted.org/en/stable/api/twisted.python.failure.Failure.html#trap TL;DR the trap call is a no-op if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd missed that this was in test code though. Why do we need to add this as an errback all of a sudden? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to add it because we know call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ahhh, I think I see: we never called errback on the mock until now! |
||
|
||
d: Deferred[Tuple[bytes, Tuple[int, Dict[bytes, List[bytes]]]]] = Deferred() | ||
self.fetches.append((d, destination, path, args)) | ||
# Note that this callback changes the value held by d. | ||
d_after_callback = d.addCallback(write_to) | ||
d_after_callback = d.addCallbacks(write_to, write_err) | ||
return make_deferred_yieldable(d_after_callback) | ||
|
||
# Mock out the homeserver's MatrixFederationHttpClient | ||
|
@@ -316,10 +323,11 @@ def _req( | |
self.assertEqual(len(self.fetches), 1) | ||
self.assertEqual(self.fetches[0][1], "example.com") | ||
self.assertEqual( | ||
self.fetches[0][2], "/_matrix/media/r0/download/" + self.media_id | ||
self.fetches[0][2], "/_matrix/media/v3/download/" + self.media_id | ||
) | ||
self.assertEqual( | ||
self.fetches[0][3], {"allow_remote": "false", "timeout_ms": "20000"} | ||
self.fetches[0][3], | ||
{"allow_remote": "false", "timeout_ms": "20000", "allow_redirect": "true"}, | ||
) | ||
|
||
headers = { | ||
|
@@ -671,6 +679,52 @@ def test_cross_origin_resource_policy_header(self) -> None: | |
[b"cross-origin"], | ||
) | ||
|
||
def test_unknown_v3_endpoint(self) -> None: | ||
""" | ||
If the v3 endpoint fails, try the r0 one. | ||
""" | ||
channel = self.make_request( | ||
"GET", | ||
f"/_matrix/media/v3/download/{self.media_id}", | ||
shorthand=False, | ||
await_result=False, | ||
) | ||
self.pump() | ||
|
||
# We've made one fetch, to example.com, using the media URL, and asking | ||
# the other server not to do a remote fetch | ||
self.assertEqual(len(self.fetches), 1) | ||
self.assertEqual(self.fetches[0][1], "example.com") | ||
self.assertEqual( | ||
self.fetches[0][2], "/_matrix/media/v3/download/" + self.media_id | ||
) | ||
|
||
# The result which says the endpoint is unknown. | ||
unknown_endpoint = b'{"errcode":"M_UNRECOGNIZED","error":"Unknown request"}' | ||
self.fetches[0][0].errback( | ||
HttpResponseException(404, "NOT FOUND", unknown_endpoint) | ||
) | ||
|
||
self.pump() | ||
|
||
# There should now be another request to the r0 URL. | ||
self.assertEqual(len(self.fetches), 2) | ||
self.assertEqual(self.fetches[1][1], "example.com") | ||
self.assertEqual( | ||
self.fetches[1][2], f"/_matrix/media/r0/download/{self.media_id}" | ||
) | ||
|
||
headers = { | ||
b"Content-Length": [b"%d" % (len(self.test_image.data))], | ||
} | ||
|
||
self.fetches[1][0].callback( | ||
(self.test_image.data, (len(self.test_image.data), headers)) | ||
) | ||
|
||
self.pump() | ||
self.assertEqual(channel.code, 200) | ||
|
||
|
||
class TestSpamCheckerLegacy: | ||
"""A spam checker module that rejects all media that includes the bytes | ||
|
Uh oh!
There was an error while loading. Please reload this page.