Skip to content

Commit

Permalink
Change sdpa
Browse files Browse the repository at this point in the history
  • Loading branch information
aymeric-roucher committed Oct 22, 2024
1 parent 886237a commit 7faf143
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 3 deletions.
4 changes: 1 addition & 3 deletions src/transformers/models/aria/configuration_aria.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ def __init__(
self.image_size = image_size
self.attention_dropout = attention_dropout
self.layer_norm_eps = layer_norm_eps
self._attn_implementation = "eager"
self.hidden_act = hidden_act
self._supports_sdpa = False

@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
Expand Down Expand Up @@ -198,7 +198,6 @@ def __init__(
self.moe_z_loss_coeff = moe_z_loss_coeff
self.moe_aux_loss_coeff = moe_aux_loss_coeff
self.moe_num_shared_experts = moe_num_shared_experts
self._attn_implementation = "eager"

super().__init__(
pad_token_id=pad_token_id,
Expand Down Expand Up @@ -249,7 +248,6 @@ def __init__(
super().__init__(**kwargs)
self.ignore_index = ignore_index
self.image_token_index = image_token_index
self._attn_implementation = "eager"

# Convert the keys and values of projector_patch_to_query_dict to integers
# This ensures consistency even if they were provided as strings
Expand Down
3 changes: 3 additions & 0 deletions src/transformers/models/aria/modeling_aria.py
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,7 @@ class AriaVisionTransformer(AriaPreTrainedModel):
"""

config_class = AriaVisionConfig
_supports_sdpa = False

def __init__(self, config: AriaVisionConfig):
super().__init__(config)
Expand Down Expand Up @@ -992,6 +993,7 @@ class AriaVisionModel(AriaPreTrainedModel):

config_class = AriaVisionConfig
main_input_name = "pixel_values"
_supports_sdpa = False

def __init__(self, config: AriaVisionConfig):
super().__init__(config)
Expand Down Expand Up @@ -2836,6 +2838,7 @@ class AriaForConditionalGeneration(AriaPreTrainedModel, GenerationMixin):
Args:
config (AriaConfig): Configuration object for the model.
"""
_supports_sdpa = False

def __init__(self, config: AriaConfig):
super().__init__(config)
Expand Down
4 changes: 4 additions & 0 deletions src/transformers/models/aria/modular_aria.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ class AriaVisionTransformer(Idefics3VisionTransformer):
This class extends the original Idefics3VisionTransformer by removing the post-layernorm operation.
"""
_supports_sdpa = False

def __init__(self, config: AriaVisionConfig):
super().__init__(config)
Expand All @@ -110,6 +111,7 @@ class AriaVisionModel(SiglipVisionModel):

config_class = AriaVisionConfig
main_input_name = "pixel_values"
_supports_sdpa = False

def __init__(self, config: AriaVisionConfig):
super().__init__(config)
Expand Down Expand Up @@ -1130,6 +1132,7 @@ class AriaForConditionalGeneration(AriaPreTrainedModel, GenerationMixin):
Args:
config (AriaConfig): Configuration object for the model.
"""
_supports_sdpa = False

def __init__(self, config: AriaConfig):
super().__init__(config)
Expand All @@ -1150,6 +1153,7 @@ def __init__(self, config: AriaConfig):
config.text_config, attn_implementation=config._attn_implementation
)
self.pad_token_id = self.config.pad_token_id if self.config.pad_token_id is not None else -1
self._use_flash_attention_2 = config.text_config._attn_implementation == "flash_attention_2"
self.post_init()

def get_input_embeddings(self):
Expand Down

0 comments on commit 7faf143

Please sign in to comment.