matrix-org · clokep · Jan 18, 2022 · Jan 5, 2022 · Jan 1, 2022 · Jan 13, 2022
@@ -0,0 +1 @@
+Fix preview of some gif URLs (like tenor.com). Contributed by Philippe Daouadi.
@@ -34,19 +34,23 @@ When Synapse is asked to preview a URL it does the following:
       2. Generates an Open Graph response based on image properties.
    5. If the media is HTML:
       1. Decodes the HTML via the stored file.
-      2. Generates an Open Graph response from the HTML.
-      3. If an image exists in the Open Graph response:
+      2. If a JSON oEmbed URL was found in the HTML:
+         1. Convert the oEmbed response to an Open Graph response.
+         2. If a thumbnail or image is in the oEmbed response:
+            1. Downloads the URL and stores it into a file via the media storage
+               provider and saves the local media metadata.
+            2. Generates thumbnails.
+            3. Updates the Open Graph response based on image properties.
+         3. If the oEmbed type is video but no video is provided, abort oEmbed
+            parsing and fall back to Open Graph
+      3. Generates an Open Graph response from the HTML.
+      4. If an image exists in the Open Graph response:
          1. Downloads the URL and stores it into a file via the media storage
             provider and saves the local media metadata.
          2. Generates thumbnails.
          3. Updates the Open Graph response based on image properties.
    6. If the media is JSON and an oEmbed URL was found:
-      1. Convert the oEmbed response to an Open Graph response.
-      2. If a thumbnail or image is in the oEmbed response:
-         1. Downloads the URL and stores it into a file via the media storage
-            provider and saves the local media metadata.
-         2. Generates thumbnails.
-         3. Updates the Open Graph response based on image properties.
+      1. Parse it as described in 3.5.2
    7. Stores the result in the database cache.
 4. Returns the result.
 

@@ -154,11 +154,14 @@ def parse_oembed_response(self, url: str, raw_body: bytes) -> OEmbedResult:
                 "og:url": url,
             }
 
-            # Use either title or author's name as the title.
-            title = oembed.get("title") or oembed.get("author_name")
+            title = oembed.get("title")
             if title:
                 open_graph_response["og:title"] = title
 
+            author_name = oembed.get("author_name")
+            if author_name:
+                open_graph_response["og:author_name"] = author_name
+
             # Use the provider name and as the site.
             provider_name = oembed.get("provider_name")
             if provider_name:

@@ -294,17 +294,20 @@ async def _do_preview(self, url: str, user: UserID, ts: int) -> bytes:
                 # Check if this HTML document points to oEmbed information and
                 # defer to that.
                 oembed_url = self._oembed.autodiscover_from_html(tree)
-                og = {}
+                og_from_oembed: JsonDict = {}
                 if oembed_url:
                     oembed_info = await self._download_url(oembed_url, user)
-                    og, expiration_ms = await self._handle_oembed_response(
+                    og_from_oembed, expiration_ms = await self._handle_oembed_response(
                         url, oembed_info, expiration_ms
                     )
 
-                # If there was no oEmbed URL (or oEmbed parsing failed), attempt
-                # to generate the Open Graph information from the HTML.
-                if not oembed_url or not og:
-                    og = parse_html_to_open_graph(tree, media_info.uri)
+                og_from_og = parse_html_to_open_graph(tree, media_info.uri)
+
+                # If there was no oEmbed URL, or oEmbed parsing failed, or the
+                # information retrieved was incomplete, we complete it from
+                # the OpenGraph information. We give oEmbed information
+                # precedence.
+                og = {**og_from_og, **og_from_oembed}
 
                 await self._precache_image_url(user, media_info, og)
             else:
@@ -321,6 +324,11 @@ async def _do_preview(self, url: str, user: UserID, ts: int) -> bytes:
             logger.warning("Failed to find any OG data in %s", url)
             og = {}
 
+        # If we don't have a title but we have author_name, copy it as
+        # title
+        if not og.get("og:title") and og.get("og:author_name"):
+            og["og:title"] = og["og:author_name"]
+
         # filter out any stupidly long values
         keys_to_remove = []
         for k, v in og.items():

diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py
@@ -674,6 +674,7 @@ def test_oembed_rich(self):
                 "og:url": "http://twitter.com/matrixdotorg/status/12345",
                 "og:title": "Alice",
                 "og:description": "Content Preview",
+                "og:author_name": "Alice",
             },
         )
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Fix preview of some gif URLs (like tenor.com). Contributed by Philippe Daouadi.