diff --git a/changelog.d/11985.misc b/changelog.d/11985.misc new file mode 100644 index 000000000000..1364a2b85421 --- /dev/null +++ b/changelog.d/11985.misc @@ -0,0 +1 @@ +Use bot user agent for openGraph queries. diff --git a/synapse/res/providers.json b/synapse/res/providers.json index f1838f955901..7b9958e45464 100644 --- a/synapse/res/providers.json +++ b/synapse/res/providers.json @@ -5,8 +5,6 @@ "endpoints": [ { "schemes": [ - "https://twitter.com/*/status/*", - "https://*.twitter.com/*/status/*", "https://twitter.com/*/moments/*", "https://*.twitter.com/*/moments/*" ], @@ -14,4 +12,4 @@ } ] } -] \ No newline at end of file +] diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 8d3d1e54dc9f..1ead89df1b0a 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -326,8 +326,9 @@ async def _do_preview(self, url: str, user: UserID, ts: int) -> bytes: # Compile the Open Graph response by using the scraped # information from the HTML and overlaying any information - # from the oEmbed response. - og = {**og_from_html, **og_from_oembed} + # from the oEmbed response. og tags from the original html + # have priority over oEmbed data. + og = {**og_from_oembed, **og_from_html} await self._precache_image_url(user, media_info, og) else: @@ -402,7 +403,10 @@ async def _download_url(self, url: str, output_stream: BinaryIO) -> DownloadResu url, output_stream=output_stream, max_size=self.max_spider_size, - headers={"Accept-Language": self.url_preview_accept_language}, + headers={ + "Accept-Language": self.url_preview_accept_language, + b"User-Agent": ["Synapse (bot)"], + }, is_allowed_content_type=_is_previewable, ) except SynapseError: