Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Fix preview of some gif URLs #11669

Merged
merged 4 commits into from
Jan 18, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/11669.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix preview of some gif URLs (like tenor.com). Contributed by Philippe Daouadi.
20 changes: 12 additions & 8 deletions docs/development/url_previews.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,23 @@ When Synapse is asked to preview a URL it does the following:
2. Generates an Open Graph response based on image properties.
5. If the media is HTML:
1. Decodes the HTML via the stored file.
2. Generates an Open Graph response from the HTML.
3. If an image exists in the Open Graph response:
2. If a JSON oEmbed URL was found in the HTML:
1. Convert the oEmbed response to an Open Graph response.
2. If a thumbnail or image is in the oEmbed response:
1. Downloads the URL and stores it into a file via the media storage
provider and saves the local media metadata.
2. Generates thumbnails.
3. Updates the Open Graph response based on image properties.
3. If the oEmbed type is video but no video is provided, abort oEmbed
parsing and fall back to Open Graph
3. Generates an Open Graph response from the HTML.
4. If an image exists in the Open Graph response:
blastrock marked this conversation as resolved.
Show resolved Hide resolved
1. Downloads the URL and stores it into a file via the media storage
provider and saves the local media metadata.
2. Generates thumbnails.
3. Updates the Open Graph response based on image properties.
6. If the media is JSON and an oEmbed URL was found:
clokep marked this conversation as resolved.
Show resolved Hide resolved
1. Convert the oEmbed response to an Open Graph response.
2. If a thumbnail or image is in the oEmbed response:
1. Downloads the URL and stores it into a file via the media storage
provider and saves the local media metadata.
2. Generates thumbnails.
3. Updates the Open Graph response based on image properties.
1. Parse it as described in 3.5.2
blastrock marked this conversation as resolved.
Show resolved Hide resolved
7. Stores the result in the database cache.
4. Returns the result.

Expand Down
9 changes: 6 additions & 3 deletions synapse/rest/media/v1/oembed.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
class OEmbedResult:
# The Open Graph result (converted from the oEmbed result).
open_graph_result: JsonDict
# The author_name of the OEmbed result
blastrock marked this conversation as resolved.
Show resolved Hide resolved
author_name: Optional[str]
# Number of milliseconds to cache the content, according to the oEmbed response.
#
# This will be None if no cache-age is provided in the oEmbed response (or
Expand Down Expand Up @@ -154,11 +156,12 @@ def parse_oembed_response(self, url: str, raw_body: bytes) -> OEmbedResult:
"og:url": url,
}

# Use either title or author's name as the title.
title = oembed.get("title") or oembed.get("author_name")
title = oembed.get("title")
if title:
open_graph_response["og:title"] = title

author_name = oembed.get("author_name")

# Use the provider name and as the site.
provider_name = oembed.get("provider_name")
if provider_name:
Expand Down Expand Up @@ -195,7 +198,7 @@ def parse_oembed_response(self, url: str, raw_body: bytes) -> OEmbedResult:
open_graph_response = {}
cache_age = None

return OEmbedResult(open_graph_response, cache_age)
return OEmbedResult(open_graph_response, author_name, cache_age)


def _fetch_urls(tree: "etree.Element", tag_name: str) -> List[str]:
Expand Down
33 changes: 23 additions & 10 deletions synapse/rest/media/v1/preview_url_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ async def _do_preview(self, url: str, user: UserID, ts: int) -> bytes:

# The number of milliseconds that the response should be considered valid.
expiration_ms = media_info.expires
author_name: Optional[str] = None

if _is_media(media_info.media_type):
file_id = media_info.filesystem_id
Expand Down Expand Up @@ -294,25 +295,32 @@ async def _do_preview(self, url: str, user: UserID, ts: int) -> bytes:
# Check if this HTML document points to oEmbed information and
# defer to that.
oembed_url = self._oembed.autodiscover_from_html(tree)
og = {}
og_from_oembed: JsonDict = {}
if oembed_url:
oembed_info = await self._download_url(oembed_url, user)
og, expiration_ms = await self._handle_oembed_response(
(
og_from_oembed,
author_name,
expiration_ms,
) = await self._handle_oembed_response(
url, oembed_info, expiration_ms
)

# If there was no oEmbed URL (or oEmbed parsing failed), attempt
# to generate the Open Graph information from the HTML.
if not oembed_url or not og:
og = parse_html_to_open_graph(tree, media_info.uri)
og_from_og = parse_html_to_open_graph(tree, media_info.uri)
blastrock marked this conversation as resolved.
Show resolved Hide resolved

# If there was no oEmbed URL, or oEmbed parsing failed, or the
# information retrieved was incomplete, we complete it from
# the OpenGraph information. We give oEmbed information
# precedence.
blastrock marked this conversation as resolved.
Show resolved Hide resolved
og = {**og_from_og, **og_from_oembed}

await self._precache_image_url(user, media_info, og)
else:
og = {}

elif oembed_url:
# Handle the oEmbed information.
og, expiration_ms = await self._handle_oembed_response(
og, author_name, expiration_ms = await self._handle_oembed_response(
url, media_info, expiration_ms
)
await self._precache_image_url(user, media_info, og)
Expand All @@ -321,6 +329,11 @@ async def _do_preview(self, url: str, user: UserID, ts: int) -> bytes:
logger.warning("Failed to find any OG data in %s", url)
og = {}

# If we don't have a title but we have author_name, copy it as
# title
if not og.get("og:title") and author_name:
og["og:title"] = author_name

# filter out any stupidly long values
keys_to_remove = []
for k, v in og.items():
Expand Down Expand Up @@ -484,7 +497,7 @@ async def _precache_image_url(

async def _handle_oembed_response(
self, url: str, media_info: MediaInfo, expiration_ms: int
) -> Tuple[JsonDict, int]:
) -> Tuple[JsonDict, Optional[str], int]:
blastrock marked this conversation as resolved.
Show resolved Hide resolved
"""
Parse the downloaded oEmbed info.

Expand All @@ -501,7 +514,7 @@ async def _handle_oembed_response(
"""
# If JSON was not returned, there's nothing to do.
if not _is_json(media_info.media_type):
return {}, expiration_ms
return {}, None, expiration_ms

with open(media_info.filename, "rb") as file:
body = file.read()
Expand All @@ -513,7 +526,7 @@ async def _handle_oembed_response(
if open_graph_result and oembed_response.cache_age is not None:
expiration_ms = oembed_response.cache_age

return open_graph_result, expiration_ms
return open_graph_result, oembed_response.author_name, expiration_ms

def _start_expire_url_cache_data(self) -> Deferred:
return run_as_background_process(
Expand Down