Skip to content

Commit

Permalink
ImageProcessor - check if input pixel values between 0-255 (huggingfa…
Browse files Browse the repository at this point in the history
…ce#25688)

* Check if pixel values between 0-255 and add doc clarification

* Add missing docstrings

* _is_scale_image -> is_scaled_image

* Spelling is hard

* Tidy up
  • Loading branch information
amyeroberts authored and EduardoPach committed Nov 18, 2023
1 parent c6fc39a commit 00bef62
Show file tree
Hide file tree
Showing 43 changed files with 370 additions and 39 deletions.
11 changes: 11 additions & 0 deletions src/transformers/image_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,17 @@ def is_batched(img):
return False


def is_scaled_image(image: np.ndarray) -> bool:
"""
Checks to see whether the pixel values have already been rescaled to [0, 1].
"""
if image.dtype == np.uint8:
return False

# It's possible the image has pixel values in [0, 255] but is of floating type
return np.min(image) >= 0 and np.max(image) <= 1


def make_list_of_images(images, expected_ndims: int = 3) -> List[ImageInput]:
"""
Ensure that the input is a list of images. If the input is a single image, it is converted to a list of length 1.
Expand Down
9 changes: 8 additions & 1 deletion src/transformers/models/beit/image_processing_beit.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
ImageInput,
PILImageResampling,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_images,
Expand Down Expand Up @@ -236,6 +237,11 @@ def _preprocess_image(
"""Preprocesses a single image."""
# All transformations expect numpy arrays.
image = to_numpy_array(image)
if is_scaled_image(image) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)
if input_data_format is None:
input_data_format = infer_channel_dimension_format(image)
image = self._preprocess(
Expand Down Expand Up @@ -328,7 +334,8 @@ def preprocess(
Args:
images (`ImageInput`):
Image to preprocess.
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
Expand Down
10 changes: 9 additions & 1 deletion src/transformers/models/bit/image_processing_bit.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
ImageInput,
PILImageResampling,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_images,
Expand Down Expand Up @@ -184,7 +185,8 @@ def preprocess(
Args:
images (`ImageInput`):
Image to preprocess.
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
Expand Down Expand Up @@ -270,6 +272,12 @@ def preprocess(
# All transformations expect numpy arrays.
images = [to_numpy_array(image) for image in images]

if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)

if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
Expand Down
10 changes: 9 additions & 1 deletion src/transformers/models/blip/image_processing_blip.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
ImageInput,
PILImageResampling,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_images,
Expand Down Expand Up @@ -176,7 +177,8 @@ def preprocess(
Args:
images (`ImageInput`):
Image to preprocess.
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
Expand Down Expand Up @@ -253,6 +255,12 @@ def preprocess(
# All transformations expect numpy arrays.
images = [to_numpy_array(image) for image in images]

if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)

if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
get_image_size,
infer_channel_dimension_format,
is_batched,
is_scaled_image,
to_numpy_array,
valid_images,
)
Expand Down Expand Up @@ -387,7 +388,8 @@ def preprocess(
Args:
images (`ImageInput`):
Image to preprocess.
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
Expand Down Expand Up @@ -469,6 +471,12 @@ def preprocess(
# All transformations expect numpy arrays.
images = [to_numpy_array(image) for image in images]

if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)

if do_resize:
images = [
self.resize(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
ImageInput,
PILImageResampling,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_images,
Expand Down Expand Up @@ -182,7 +183,8 @@ def preprocess(
Args:
images (`ImageInput`):
Image to preprocess.
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
Expand Down Expand Up @@ -268,6 +270,12 @@ def preprocess(
# All transformations expect numpy arrays.
images = [to_numpy_array(image) for image in images]

if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)

if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
Expand Down
10 changes: 9 additions & 1 deletion src/transformers/models/clip/image_processing_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
ImageInput,
PILImageResampling,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_images,
Expand Down Expand Up @@ -183,7 +184,8 @@ def preprocess(
Args:
images (`ImageInput`):
Image to preprocess.
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
Expand Down Expand Up @@ -269,6 +271,12 @@ def preprocess(
# All transformations expect numpy arrays.
images = [to_numpy_array(image) for image in images]

if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)

if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
PILImageResampling,
get_image_size,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_coco_detection_annotations,
Expand Down Expand Up @@ -1126,7 +1127,8 @@ def preprocess(
Args:
images (`ImageInput`):
Image or batch of images to preprocess.
Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging
from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`.
annotations (`AnnotationType` or `List[AnnotationType]`, *optional*):
List of annotations associated with the image or batch of images. If annotation is for object
detection, the annotations should be a dictionary with the following keys:
Expand Down Expand Up @@ -1259,6 +1261,12 @@ def preprocess(
# All transformations expect numpy arrays
images = [to_numpy_array(image) for image in images]

if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)

if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
Expand Down
10 changes: 9 additions & 1 deletion src/transformers/models/convnext/image_processing_convnext.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
ImageInput,
PILImageResampling,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_images,
Expand Down Expand Up @@ -203,7 +204,8 @@ def preprocess(
Args:
images (`ImageInput`):
Image to preprocess.
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
Expand Down Expand Up @@ -280,6 +282,12 @@ def preprocess(
# All transformations expect numpy arrays.
images = [to_numpy_array(image) for image in images]

if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)

if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
PILImageResampling,
get_image_size,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_coco_detection_annotations,
Expand Down Expand Up @@ -1124,7 +1125,8 @@ def preprocess(
Args:
images (`ImageInput`):
Image or batch of images to preprocess.
Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging
from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`.
annotations (`AnnotationType` or `List[AnnotationType]`, *optional*):
List of annotations associated with the image or batch of images. If annotation is for object
detection, the annotations should be a dictionary with the following keys:
Expand Down Expand Up @@ -1257,6 +1259,12 @@ def preprocess(
# All transformations expect numpy arrays
images = [to_numpy_array(image) for image in images]

if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)

if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
Expand Down
10 changes: 9 additions & 1 deletion src/transformers/models/deit/image_processing_deit.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
ImageInput,
PILImageResampling,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_images,
Expand Down Expand Up @@ -180,7 +181,8 @@ def preprocess(
Args:
images (`ImageInput`):
Image to preprocess.
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image.
size (`Dict[str, int]`, *optional*, defaults to `self.size`):
Expand Down Expand Up @@ -258,6 +260,12 @@ def preprocess(
# All transformations expect numpy arrays.
images = [to_numpy_array(image) for image in images]

if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)

if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
Expand Down
10 changes: 9 additions & 1 deletion src/transformers/models/deta/image_processing_deta.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
get_image_size,
infer_channel_dimension_format,
is_batched,
is_scaled_image,
to_numpy_array,
valid_coco_detection_annotations,
valid_coco_panoptic_annotations,
Expand Down Expand Up @@ -796,7 +797,8 @@ def preprocess(
Args:
images (`ImageInput`):
Image or batch of images to preprocess.
Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging
from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`.
annotations (`List[Dict]` or `List[List[Dict]]`, *optional*):
List of annotations associated with the image or batch of images. If annotionation is for object
detection, the annotations should be a dictionary with the following keys:
Expand Down Expand Up @@ -921,6 +923,12 @@ def preprocess(
# All transformations expect numpy arrays
images = [to_numpy_array(image) for image in images]

if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)

if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
Expand Down
10 changes: 9 additions & 1 deletion src/transformers/models/detr/image_processing_detr.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
PILImageResampling,
get_image_size,
infer_channel_dimension_format,
is_scaled_image,
make_list_of_images,
to_numpy_array,
valid_coco_detection_annotations,
Expand Down Expand Up @@ -1096,7 +1097,8 @@ def preprocess(
Args:
images (`ImageInput`):
Image or batch of images to preprocess.
Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging
from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`.
annotations (`AnnotationType` or `List[AnnotationType]`, *optional*):
List of annotations associated with the image or batch of images. If annotation is for object
detection, the annotations should be a dictionary with the following keys:
Expand Down Expand Up @@ -1229,6 +1231,12 @@ def preprocess(
# All transformations expect numpy arrays
images = [to_numpy_array(image) for image in images]

if is_scaled_image(images[0]) and do_rescale:
logger.warning_once(
"It looks like you are trying to rescale already rescaled images. If the input"
" images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
)

if input_data_format is None:
# We assume that all images have the same channel dimension format.
input_data_format = infer_channel_dimension_format(images[0])
Expand Down
Loading

0 comments on commit 00bef62

Please sign in to comment.