Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Do not break URL previews if an image 404s #12950

Merged
merged 2 commits into from
Jun 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/12950.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a long-standing bug where a URL preview would break if the image failed to download.
23 changes: 17 additions & 6 deletions synapse/rest/media/v1/preview_url_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,20 +586,33 @@ async def _precache_image_url(
og: The Open Graph dictionary. This is modified with image information.
"""
# If there's no image or it is blank, there's nothing to do.
if "og:image" not in og or not og["og:image"]:
if "og:image" not in og:
return

# Remove the raw image URL, this will be replaced with an MXC URL, if successful.
image_url = og.pop("og:image")
if not image_url:
return

# The image URL from the HTML might be relative to the previewed page,
# convert it to an URL which can be requested directly.
image_url = og["og:image"]
url_parts = urlparse(image_url)
if url_parts.scheme != "data":
image_url = urljoin(media_info.uri, image_url)

# FIXME: it might be cleaner to use the same flow as the main /preview_url
# request itself and benefit from the same caching etc. But for now we
# just rely on the caching on the master request to speed things up.
image_info = await self._handle_url(image_url, user, allow_data_urls=True)
try:
image_info = await self._handle_url(image_url, user, allow_data_urls=True)
except Exception as e:
# Pre-caching the image failed, don't block the entire URL preview.
logger.warning(
"Pre-caching image failed during URL preview: %s errored with %s",
image_url,
e,
)
Comment on lines +610 to +614
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't love catching all exceptions here; we don't care about e.g. 404s and such, but maybe we should log at error if something else happens?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How would you propose narrowing?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have no idea, I'm hoping someone else will have ideas! 😄 I unfortunately think we've eaten the original exception in the SImpleHttpClient though...and we do want to catch many different exceptions here (non-200 results, DNS, etc.)

return

if _is_media(image_info.media_type):
# TODO: make sure we don't choke on white-on-transparent images
Expand All @@ -611,13 +624,11 @@ async def _precache_image_url(
og["og:image:width"] = dims["width"]
og["og:image:height"] = dims["height"]
else:
logger.warning("Couldn't get dims for %s", og["og:image"])
logger.warning("Couldn't get dims for %s", image_url)

og["og:image"] = f"mxc://{self.server_name}/{image_info.filesystem_id}"
og["og:image:type"] = image_info.media_type
og["matrix:image:size"] = image_info.media_length
else:
del og["og:image"]

async def _handle_oembed_response(
self, url: str, media_info: MediaInfo, expiration_ms: int
Expand Down
35 changes: 35 additions & 0 deletions tests/rest/media/v1/test_url_preview.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,41 @@ def test_accept_language_config_option(self) -> None:
server.data,
)

def test_nonexistent_image(self) -> None:
"""If the preview image doesn't exist, ensure some data is returned."""
self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]

end_content = (
b"""<html><body><img src="http://cdn.matrix.org/foo.jpg"></body></html>"""
)

channel = self.make_request(
"GET",
"preview_url?url=http://matrix.org",
shorthand=False,
await_result=False,
)
self.pump()

client = self.reactor.tcpClients[0][2].buildProtocol(None)
server = AccumulatingProtocol()
server.makeConnection(FakeTransport(client, self.reactor))
client.makeConnection(FakeTransport(server, self.reactor))
client.dataReceived(
(
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
b'Content-Type: text/html; charset="utf8"\r\n\r\n'
)
% (len(end_content),)
+ end_content
)

self.pump()
self.assertEqual(channel.code, 200)

# The image should not be in the result.
self.assertNotIn("og:image", channel.json_body)

def test_data_url(self) -> None:
"""
Requesting to preview a data URL is not supported.
Expand Down