diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py index e0525b9..c082546 100644 --- a/faster_whisper/transcribe.py +++ b/faster_whisper/transcribe.py @@ -908,6 +908,13 @@ def find_alignment( words, word_tokens = tokenizer.split_to_word_tokens( text_tokens + [tokenizer.eot] ) + if len(word_tokens) <= 1: + # return on eot only + # >>> np.pad([], (1, 0)) + # array([0.]) + # This results in crashes when we lookup jump_times with float, like + # IndexError: arrays used as indices must be of integer (or boolean) type + return [] word_boundaries = np.pad(np.cumsum([len(t) for t in word_tokens[:-1]]), (1, 0)) if len(word_boundaries) <= 1: return []