Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Rawpixel image URLs for ingestion #3669

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 41 additions & 6 deletions catalog/dags/providers/provider_api_scripts/rawpixel.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,19 @@ class RawpixelDataIngester(ProviderDataIngester):
"creative commons",
"public domain",
}
# Image size options
full_size_option = "image_1300"
# TODO: Use as part of https://github.com/WordPress/openverse-catalog/issues/817
thumbnail_size_option = "image_600_png"
# Image size options, without watermark.
full_size_option = "editor_1024" # Always serve webp format, "image_1000" can be used as an alternative with jpeg fallback
png_full_size_option = "image_png_1300" # Serve webp format if accepted else jpeg
png_dark_full_size_option = (
"dark_image_png_1300" # Serve webp format if accepted else jpeg
)
thumbnail_size_option = "image_600" # Serve webp format if accepted else jpeg
png_thumbnail_size_option = (
"image_png_600" # Serve webp format if accepted else jpeg
)
png_dark_thumbnail_size_option = (
"dark_image_png_600" # Serve webp format if accepted else jpeg
)

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
Expand Down Expand Up @@ -133,6 +142,24 @@ def get_batch_data(self, response_json):
return results
return None

@staticmethod
def _get_full_size_preset(self, data: dict) -> str | None:
"""Get the full size preset based on the image properties."""
if data.get("isPng"):
if data.get("isDarkPng"):
return RawpixelDataIngester.png_dark_full_size_option
return RawpixelDataIngester.png_full_size_option
return RawpixelDataIngester.full_size_option

@staticmethod
def _get_thumbnail_size_preset(self, data: dict) -> str | None:
"""Get the full size preset based on the thumbnail properties."""
if data.get("isPng"):
if data.get("isDarkPng"):
return RawpixelDataIngester.png_dark_thumbnail_size_option
return RawpixelDataIngester.png_thumbnail_size_option
return RawpixelDataIngester.thumbnail_size_option

@staticmethod
def _get_image_url(data: dict, size_option: str) -> str | None:
"""
Expand All @@ -145,10 +172,12 @@ def _get_image_url(data: dict, size_option: str) -> str | None:
'image_png_400', 'image_500', 'image_png_500', 'image_600', 'image_png_600',
'image_700', 'image_png_700', 'image_800', 'image_png_800', 'image_900',
'image_png_900', 'image_1000', 'image_png_1000', 'image_1300',
'image_png_1300'
'image_png_1300', 'editor_1024'

The number refers to the width displayed, and a png option is provided for each
size.

Note that preset size starting from 1100 are watermarked.
"""
style_uri = data.get("style_uri")
if not style_uri:
Expand Down Expand Up @@ -259,13 +288,19 @@ def get_record_data(self, data: dict) -> dict | list[dict] | None:
if not (license_info := get_license_info(metadata["licenseUrl"])):
return None

if not (url := self._get_image_url(data, self.full_size_option)):
full_size_preset = self._get_full_size_preset(self, data)
if not (url := self._get_image_url(data, full_size_preset)):
return None

thumbnail_preset = self._get_thumbnail_size_preset(self, data)
if not (thumbnail_url := self._get_image_url(data, thumbnail_preset)):
thumbnail_url = None

width, height = self._get_image_properties(data)
return {
"foreign_landing_url": foreign_landing_url,
"url": url,
"thumbnail_url": thumbnail_url,
"license_info": license_info,
"foreign_identifier": foreign_identifier,
"width": width,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,8 @@ def test_get_record_data():
"foreign_identifier": 4032668,
"foreign_landing_url": "https://www.rawpixel.com/image/4032668/photo-image-background-nature-mountain", # noqa
"height": 5515,
"url": "https://images.rawpixel.com/image_1300/cHJpdmF0ZS9sci9pbWFnZXMvd2Vic2l0ZS8yMDIyLTA1L2ZsNDY0NDU5OTQ2MjQtaW1hZ2Uta3UyY21zcjUuanBn.jpg", # noqa
"url": "https://images.rawpixel.com/editor_1024/cHJpdmF0ZS9sci9pbWFnZXMvd2Vic2l0ZS8yMDIyLTA1L2ZsNDY0NDU5OTQ2MjQtaW1hZ2Uta3UyY21zcjUuanBn.jpg", # noqa,
"thumbnail_url": "https://images.rawpixel.com/image_600/cHJpdmF0ZS9sci9pbWFnZXMvd2Vic2l0ZS8yMDIyLTA1L2ZsNDY0NDU5OTQ2MjQtaW1hZ2Uta3UyY21zcjUuanBn.jpg", # noqa
"license_info": LicenseInfo(
license="cc0",
version="1.0",
Expand Down