forked from ggerganov/whisper.cpp
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
whisper : add OpenVINO support (ggerganov#1037)
* openvino: use OpenVINO encoder inference * openvino: add python script for OpenVINO model generation * whisper: Fix 'unused' warnings when OpenVINO isn't enabled in build * Apply suggestions from code review Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * whisper: Fix compilation error * whisper: revert whisper_get_openvino_path_encoder & whisper_get_openvino_path_cache to non-const func signatures * cmake: Add openvino-encoder as separate object target * whisper : minor style fixes * minor : indentation fixes --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
- Loading branch information
1 parent
7303033
commit 4a11426
Showing
8 changed files
with
367 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
import argparse | ||
import torch | ||
from whisper import load_model | ||
import os | ||
from openvino.tools import mo | ||
from openvino.runtime import serialize | ||
import shutil | ||
|
||
def convert_encoder(hparams, encoder, mname): | ||
encoder.eval() | ||
|
||
mel = torch.zeros((1, 80, 3000)) | ||
|
||
onnx_folder=os.path.join(os.path.dirname(__file__),"onnx_encoder") | ||
|
||
#create a directory to store the onnx model, and other collateral that is saved during onnx export procedure | ||
if not os.path.isdir(onnx_folder): | ||
os.makedirs(onnx_folder) | ||
|
||
onnx_path = os.path.join(onnx_folder, "whisper_encoder.onnx") | ||
|
||
torch.onnx.export( | ||
encoder, | ||
mel, | ||
onnx_path, | ||
input_names=["mel"], | ||
output_names=["output_features"] | ||
) | ||
|
||
# use model optimizer to convert onnx to OpenVINO IR format | ||
encoder_model = mo.convert_model(onnx_path, compress_to_fp16=True) | ||
serialize(encoder_model, xml_path='ggml-' + mname + '-encoder-openvino.xml') | ||
|
||
#cleanup | ||
if os.path.isdir(onnx_folder): | ||
shutil.rmtree(onnx_folder) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1)", required=True) | ||
args = parser.parse_args() | ||
|
||
if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1"]: | ||
raise ValueError("Invalid model name") | ||
|
||
whisper = load_model(args.model).cpu() | ||
hparams = whisper.dims | ||
|
||
encoder = whisper.encoder | ||
|
||
# Convert encoder to onnx | ||
convert_encoder(hparams, encoder, args.model) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
openvino-dev[pytorch,onnx] | ||
openai-whisper |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
#include "openvino/whisper-openvino-encoder.h" | ||
#include "ggml.h" | ||
#include <openvino/openvino.hpp> | ||
#include <iostream> | ||
|
||
struct whisper_openvino_context { | ||
ov::InferRequest inferRequest; | ||
}; | ||
|
||
struct whisper_openvino_context * whisper_openvino_init(const char* path_model, | ||
const char* device, | ||
const char* cache_dir) | ||
{ | ||
if (!path_model || !device) { | ||
fprintf(stderr, "%s: path_model and/or device is null\n", __func__); | ||
return nullptr; | ||
} | ||
|
||
fprintf(stderr, "%s: path_model = %s, device = %s, cache_dir = %s\n", | ||
__func__, path_model, device, cache_dir ? cache_dir : "(not set)"); | ||
|
||
whisper_openvino_context *context = new whisper_openvino_context; | ||
try { | ||
ov::Core core; | ||
|
||
if (cache_dir) { | ||
// enables caching of device-specific 'blobs' during core.compile_model | ||
// routine. This speeds up calls to compile_model for successive runs. | ||
core.set_property(ov::cache_dir(cache_dir)); | ||
} | ||
|
||
//Read the OpenVINO encoder IR (.xml/.bin) from disk, producing an ov::Model object. | ||
std::shared_ptr<ov::Model> model = core.read_model(path_model); | ||
|
||
// Produce a compiled-model object, given the device ("CPU", "GPU", etc.) | ||
auto compiledModel = core.compile_model(model, device); | ||
|
||
// From the compiled model object, create an infer request. This is the thing that we | ||
// we will use later on to trigger inference execution. | ||
context->inferRequest = compiledModel.create_infer_request(); | ||
} | ||
catch (const std::exception& error) { | ||
std::cout << "in openvino encoder compile routine: exception: " << error.what() << std::endl; | ||
delete context; | ||
context = nullptr; | ||
} | ||
|
||
return context; | ||
} | ||
|
||
void whisper_openvino_free(struct whisper_openvino_context * ctx) { | ||
if( ctx ) { | ||
delete ctx; | ||
} | ||
} | ||
|
||
int whisper_openvino_encode( | ||
whisper_openvino_context* ctx, | ||
ggml_tensor* mel, | ||
ggml_tensor* out) { | ||
|
||
if (!ctx || !mel || !out) { | ||
fprintf(stderr, "%s: Error! ctx / mel / out is null\n", __func__); | ||
return 0; | ||
} | ||
|
||
if (mel->n_dims != 2) { | ||
fprintf(stderr, "%s: Error! mel ggml_tensor expected to have n_dims=2, but it has n_dims=%d\n", | ||
__func__, mel->n_dims); | ||
return 0; | ||
} | ||
|
||
if (out->n_dims != 2) { | ||
fprintf(stderr, "%s: Error! out ggml_tensor expected to have n_dims=2, but it has n_dims=%d\n", | ||
__func__, out->n_dims); | ||
return 0; | ||
} | ||
|
||
try { | ||
|
||
//wrap the passed-in mel ggml_tensor as an OpenVINO Tensor object, and set as input tensor to infer request | ||
{ | ||
// note, we populate shape & stride dimensions in opposite order from how they are listed in ne / nb arrays | ||
ov::Shape input_shape = { 1, (unsigned long long)mel->ne[1], (unsigned long long)mel->ne[0] }; | ||
ov::Strides input_strides = { mel->nb[2], mel->nb[1], mel->nb[0] }; | ||
ov::Tensor input_tensor(ov::element::f32, input_shape, mel->data, input_strides); | ||
ctx->inferRequest.set_input_tensor(input_tensor); | ||
} | ||
|
||
//wrap the passed-in out ggml_tensor as an OpenVINO Tensor object, and set as output tensor to infer request | ||
{ | ||
// note, we populate shape & stride dimensions in opposite order from how they are listed in ne / nb arrays | ||
ov::Shape output_shape = { 1, (unsigned long long)out->ne[1], (unsigned long long)out->ne[0] }; | ||
ov::Strides output_strides = { out->nb[2], out->nb[1], out->nb[0] }; | ||
ov::Tensor out_tensor(ov::element::f32, output_shape, out->data, output_strides); | ||
ctx->inferRequest.set_output_tensor(out_tensor); | ||
} | ||
|
||
//run inference | ||
ctx->inferRequest.infer(); | ||
} | ||
catch (const std::exception& error) { | ||
std::cout << "in openvino encode inference execution routine: exception: " << error.what() << std::endl; | ||
return 0; | ||
} | ||
|
||
return 1; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
// Wrapper of the OpenVINO Whisper Encoder model | ||
// | ||
|
||
#if __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
struct whisper_openvino_context; | ||
|
||
// initialize openvino encoder, given path to model xml, device ("CPU", "GPU", etc.), and | ||
// path to cache_dir. Returns null upon failure. | ||
struct whisper_openvino_context * whisper_openvino_init(const char * path_model, | ||
const char * device, | ||
const char * cache_dir); | ||
|
||
// clean up a ctx previously returned from whisper_openvino_init() | ||
void whisper_openvino_free(struct whisper_openvino_context * ctx); | ||
|
||
struct ggml_tensor; | ||
|
||
// Perform encode using OpenVINO. | ||
// Returns 1 on success | ||
// Returns 0 on failure | ||
int whisper_openvino_encode( | ||
whisper_openvino_context* ctx, | ||
ggml_tensor* mel, | ||
ggml_tensor* out); | ||
|
||
#if __cplusplus | ||
} | ||
#endif |
Oops, something went wrong.