Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: Add Support for Data URI #385

Merged
merged 8 commits into from
Oct 1, 2024
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions libs/infinity_emb/infinity_emb/transformer/vision/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
# Copyright (c) 2023-now michaelfeil

import asyncio
import re
import io
from base64 import b64decode
from typing import List, Union

from infinity_emb._optional_imports import CHECK_AIOHTTP, CHECK_PIL
Expand Down Expand Up @@ -49,13 +51,50 @@ async def resolve_from_img_url(
f"error opening the payload from an image in your request from url: {e}"
)

def resolve_from_img_base64(uri: str) -> ImageSingle:
"""Resolve an image from a Data URI"""
try:
base64_image = uri.split(",")[-1]
decoded_image = b64decode(base64_image)
img = Image.open(io.BytesIO(decoded_image))
return ImageSingle(image=img)
except Exception as e:
raise ImageCorruption(
f"error decoding data URI: {e}"
)


def is_base64_check(s: str):
"""Regex check to quickly check if string is base64 or not."""
pattern = (
r"^[A-Za-z0-9+/]{4}([A-Za-z0-9+/]{4})*([A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$"
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a performance efficient check for this? Is it needed (doesn't the Mozilla spec say it needs to say base64?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@michaelfeil do we need this? or want this?

)
return bool(re.match(pattern, s))


def is_base64_data_uri(uri: str) -> bool:
"""Simply check if the uri is a Data URI or not

Ref: https://developer.mozilla.org/en-US/docs/web/http/basics_of_http/data_urls
"""

starts_with_data = uri.startswith("data:")

b64_data_q = uri.split(",")[-1]
is_base64 = is_base64_check(b64_data_q)

return starts_with_data and is_base64



async def resolve_image(
img: Union[str, "ImageClassType"], session: "aiohttp.ClientSession"
) -> ImageSingle:
"""Resolve a single image."""
if isinstance(img, Image.Image):
return resolve_from_img_obj(img)
elif is_base64_data_uri(img):
return resolve_from_img_base64(img)
elif isinstance(img, str):
return await resolve_from_img_url(img, session=session)
else:
Expand Down
Loading