Skip to content

Commit

Permalink
Update CLIP tokenizer (#2599)
Browse files Browse the repository at this point in the history
The self.processor call would pass the padding to image_processor, which gives a warning
  • Loading branch information
tomaarsen authored Apr 17, 2024
1 parent d50a04a commit 43f4a22
Showing 1 changed file with 10 additions and 8 deletions.
18 changes: 10 additions & 8 deletions sentence_transformers/models/CLIPModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,16 @@ def tokenize(self, texts, padding: Union[str, bool] = True):
texts_values.append(data)
image_text_info.append(1)

if len(texts_values) == 0:
texts_values = None
if len(images) == 0:
images = None

inputs = self.processor(text=texts_values, images=images, return_tensors="pt", padding=padding)
inputs["image_text_info"] = image_text_info
return inputs
encoding = {}
if len(texts_values):
encoding = self.processor.tokenizer(texts_values, return_tensors="pt", padding=padding)

if len(images):
image_features = self.processor.image_processor(images, return_tensors="pt")
encoding["pixel_values"] = image_features.pixel_values

encoding["image_text_info"] = image_text_info
return encoding

def save(self, output_path: str):
self.model.save_pretrained(output_path)
Expand Down

0 comments on commit 43f4a22

Please sign in to comment.