Support Export of Openai Whisper [Batched decoding ver] #18815

shubhambhokare1 · 2023-12-14T00:06:52Z

Depending on merge of #17316

Two additional inputs added to the encoderdecoderinit subgraph (for the first decoder run):

left_pad_mask: left pad mask is added to qk node in the qkv attention function
position_ids: used to select indices of positional embeddings [x = self.token_embedding(x) + self.positional_embedding[position_ids, :]]

onnxruntime/python/tools/transformers/models/whisper/whisper_openai_helper.py

@@ -0,0 +1,87 @@
+# -------------------------------------------------------------------------


onnxruntime/python/tools/transformers/models/whisper/whisper_openai_helper.py

onnxruntime/python/tools/transformers/fusion_bart_attention_openai.py

@@ -0,0 +1,486 @@
+# -------------------------------------------------------------------------


onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py

@@ -312,8 +378,15 @@
            "tensor(uint8)": np.uint8,
        }

+        # Generate prompts
+        prompt_text = "Christians"
+        prompt_ids = processor.get_prompt_ids(prompt_text)


onnxruntime/python/tools/transformers/models/whisper/whisper_openai_helper.py

onnxruntime/python/tools/transformers/onnx_model_bart.py

+            self.attention_fusion = FusionBartAttentionOpenai(
+                self,
+                self.hidden_size,
+                self.num_heads,
+                self.attention_mask
+            )


onnxruntime/python/tools/transformers/onnx_model_bart.py

+                self.attention_mask
+            )
+        else:
+            self.attention_fusion = FusionBartAttention(self, self.hidden_size, self.num_heads, self.attention_mask)


onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py

@@ -343,21 +420,24 @@
        diff = pt_outputs - ort_outputs
        max_diff = max(diff.min(), diff.max(), key=abs)

-        if max_diff > 0:
+        if True:


onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py

@@ -312,8 +378,15 @@
            "tensor(uint8)": np.uint8,
        }

+        # Generate prompts
+        prompt_text = "Christians"
+        prompt_ids = processor.get_prompt_ids(prompt_text)


onnxruntime/python/tools/transformers/models/whisper/whisper_encoder_decoder_init.py

@@ -8,6 +8,7 @@
 import logging
 import os
 import tempfile
+import copy


onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py

@@ -6,6 +6,7 @@

 import logging
 import os
+import io


onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py

@@ -19,6 +20,10 @@
 from whisper_encoder import WhisperEncoder, WhisperEncoderHelper
 from whisper_encoder_decoder_init import WhisperEncoderDecoderInit, WhisperEncoderDecoderInitHelper

+from whisper.model import Whisper, ModelDimensions


onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py

@@ -19,6 +20,10 @@
 from whisper_encoder import WhisperEncoder, WhisperEncoderHelper
 from whisper_encoder_decoder_init import WhisperEncoderDecoderInit, WhisperEncoderDecoderInitHelper

+from whisper.model import Whisper, ModelDimensions


onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py

@@ -19,6 +20,10 @@
 from whisper_encoder import WhisperEncoder, WhisperEncoderHelper
 from whisper_encoder_decoder_init import WhisperEncoderDecoderInit, WhisperEncoderDecoderInitHelper

+from whisper.model import Whisper, ModelDimensions
+from whisper import _MODELS, _ALIGNMENT_HEADS


onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py

@@ -19,6 +20,10 @@
 from whisper_encoder import WhisperEncoder, WhisperEncoderHelper
 from whisper_encoder_decoder_init import WhisperEncoderDecoderInit, WhisperEncoderDecoderInitHelper

+from whisper.model import Whisper, ModelDimensions
+from whisper import _MODELS, _ALIGNMENT_HEADS


onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py

@@ -19,6 +20,10 @@
 from whisper_encoder import WhisperEncoder, WhisperEncoderHelper
 from whisper_encoder_decoder_init import WhisperEncoderDecoderInit, WhisperEncoderDecoderInitHelper

+from whisper.model import Whisper, ModelDimensions
+from whisper import _MODELS, _ALIGNMENT_HEADS
+from whisper import _download


onnxruntime/python/tools/transformers/models/whisper/whisper_encoder_decoder_init.py

@@ -8,6 +8,7 @@
 import logging
 import os
 import tempfile
+import copy


onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py

@@ -6,6 +6,7 @@

 import logging
 import os
+import io


onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py

@@ -19,6 +20,10 @@
 from whisper_encoder import WhisperEncoder, WhisperEncoderHelper
 from whisper_encoder_decoder_init import WhisperEncoderDecoderInit, WhisperEncoderDecoderInitHelper

+from whisper.model import Whisper, ModelDimensions


onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py

@@ -19,6 +20,10 @@
 from whisper_encoder import WhisperEncoder, WhisperEncoderHelper
 from whisper_encoder_decoder_init import WhisperEncoderDecoderInit, WhisperEncoderDecoderInitHelper

+from whisper.model import Whisper, ModelDimensions
+from whisper import _MODELS, _ALIGNMENT_HEADS


onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py

@@ -19,6 +20,10 @@
 from whisper_encoder import WhisperEncoder, WhisperEncoderHelper
 from whisper_encoder_decoder_init import WhisperEncoderDecoderInit, WhisperEncoderDecoderInitHelper

+from whisper.model import Whisper, ModelDimensions
+from whisper import _MODELS, _ALIGNMENT_HEADS
+from whisper import _download


onnxruntime/contrib_ops/cpu/transformers/beam_search_parameters.cc

shubhambhokare1 · 2024-02-15T01:10:35Z

onnxruntime/contrib_ops/cuda/transformers/beam_search.cc

@@ -50,6 +50,8 @@ ONNX_OPERATOR_KERNEL_EX(
        .InputMemoryType(OrtMemTypeCPUInput, 10)   // 'decoder_input_ids' needs to be on CPU
        .InputMemoryType(OrtMemTypeCPUInput, 11)   // 'logits_processor' needs to be on CPU
        .InputMemoryType(OrtMemTypeCPUInput, 14)   // 'temperature' needs to be on CPU
+        .InputMemoryType(OrtMemTypeCPUInput, 15)   // 'left_pad_mask' needs to be on CPU


Update new input descriptions

shubhambhokare1 · 2024-02-15T01:18:18Z

onnxruntime/python/tools/transformers/models/whisper/whisper_helper.py

@@ -374,8 +381,15 @@ def verify_onnx(
            "tensor(uint8)": np.uint8,
        }

+        # Generate prompts
+        prompt_text = "Christians"


Clean up testing module

thiagocrepaldi

Try running lintrunner -a to fix the lint issues.

tianleiwu · 2024-02-20T21:37:22Z

onnxruntime/python/tools/transformers/onnx_model_bart.py

@@ -124,7 +125,12 @@ class BartOnnxModel(BertOnnxModel):
    def __init__(self, model, num_heads, hidden_size, model_impl="hf"):
        super().__init__(model, num_heads, hidden_size)
        self.attention_mask = AttentionMask(self)
-        self.attention_fusion = FusionBartAttention(self, self.hidden_size, self.num_heads, self.attention_mask)
+        if model_impl == "openai":


I think we do not need model_impl. We can do like the following:
fuse attention with FusionBartAttentionOpenai
fuse attention with FusionBartAttention

So that it can handle graph patterns of both OpenAI and HF.

tianleiwu · 2024-02-20T21:39:13Z

onnxruntime/python/tools/transformers/fusion_options.py

@@ -59,6 +59,7 @@ def __init__(self, model_type):

        if model_type == "clip":
            self.enable_embed_layer_norm = False
+        self.model_impl = "hf"


There is no need for this option. See another of my comment.

thiagocrepaldi · 2024-02-29T15:41:21Z

@shubhambhokare1 any update for this pr?

shubhambhokare1 · 2024-03-11T18:52:38Z

#19854 has the same functionality without the requirement of adding new inputs to the subgraphs

github-advanced-security bot found potential problems Dec 14, 2023

View reviewed changes

shubhambhokare1 added 9 commits February 13, 2024 19:35

Add initial spec for openai whisper decode op

206bafa

Add openai fusion files

d35a7bf

Modify decoding Logic

1d2b214

Add optimizations to account for 3d to 4d past

1ea7bc7

minor code cleanup

27b5cfc

Modify method of model name input

4b23744

Revert rebase confilcts

2781dbe

add new parameters for batch decoding

f5ff690

Correct datatype for position_ids

12f0ff1

shubhambhokare1 force-pushed the sbhokare/whisper_batch_decode branch from 1c2d388 to 12f0ff1 Compare February 13, 2024 20:04

shubhambhokare1 marked this pull request as ready for review February 13, 2024 20:07

github-advanced-security bot found potential problems Feb 13, 2024

View reviewed changes

thiagocrepaldi requested a review from kunal-vaishnavi February 13, 2024 20:33

shubhambhokare1 commented Feb 15, 2024

View reviewed changes

onnxruntime/contrib_ops/cpu/transformers/beam_search_parameters.cc Outdated Show resolved Hide resolved

shubhambhokare1 commented Feb 15, 2024

View reviewed changes

thiagocrepaldi reviewed Feb 15, 2024

View reviewed changes

fix linting issues

d4e84ab

tianleiwu reviewed Feb 20, 2024

View reviewed changes

shubhambhokare1 closed this Mar 11, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Support Export of Openai Whisper [Batched decoding ver] #18815

Support Export of Openai Whisper [Batched decoding ver] #18815

shubhambhokare1 commented Dec 14, 2023 •

edited

Loading

shubhambhokare1 Feb 15, 2024

shubhambhokare1 Feb 15, 2024

thiagocrepaldi left a comment

tianleiwu Feb 20, 2024 •

edited

Loading

tianleiwu Feb 20, 2024

thiagocrepaldi commented Feb 29, 2024

shubhambhokare1 commented Mar 11, 2024

		@@ -0,0 +1,87 @@
		# -------------------------------------------------------------------------

		@@ -0,0 +1,486 @@
		# -------------------------------------------------------------------------

Support Export of Openai Whisper [Batched decoding ver] #18815

Support Export of Openai Whisper [Batched decoding ver] #18815

Conversation

shubhambhokare1 commented Dec 14, 2023 • edited Loading

shubhambhokare1 Feb 15, 2024

Choose a reason for hiding this comment

shubhambhokare1 Feb 15, 2024

Choose a reason for hiding this comment

thiagocrepaldi left a comment

Choose a reason for hiding this comment

tianleiwu Feb 20, 2024 • edited Loading

Choose a reason for hiding this comment

tianleiwu Feb 20, 2024

Choose a reason for hiding this comment

thiagocrepaldi commented Feb 29, 2024

shubhambhokare1 commented Mar 11, 2024

shubhambhokare1 commented Dec 14, 2023 •

edited

Loading

tianleiwu Feb 20, 2024 •

edited

Loading