-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextractor.py
118 lines (96 loc) · 4.13 KB
/
extractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env python3
from moviepy.editor import *
from moviepy.video.tools.subtitles import SubtitlesClip
from vosk import Model, KaldiRecognizer, SetLogLevel
import datetime
import json
import os
import srt
import subprocess
MODEL_PATH = "data/model"
SAMPLE_RATE = 16000
WORDS_PER_LINE = 7
class VOSK_Extractor:
def __init__(self, file, sampleRate=None, model=None):
self.check_for_model()
self.dataFile = file
if sampleRate is None:
self.sampleRate = SAMPLE_RATE
else:
self.sampleRate = sampleRate
if model is None:
self.model = Model(MODEL_PATH)
else:
self.model = Model(model)
# VOSK General Model Setup
SetLogLevel(0)
self.rec = KaldiRecognizer(self.model, self.sampleRate)
self.rec.SetWords(True) # Important for SRT file construction
# Initialize SRT Variables
self.output = ""
self.results = []
self.subs = []
def check_for_model(self):
if not os.path.exists("data/model"):
print(
"Please download a valid model from https://alphacephei.com/vosk/models and "
"unpack as 'any_name' in your working directory. This is the training model "
"needed to work with the VOSK API.")
exit(1)
def initialize_process(self):
self.process = subprocess.Popen(['ffmpeg', '-loglevel', 'quiet', '-i', self.dataFile, '-ar',
str(self.sampleRate) , '-ac', '1', '-f', 's16le', '-'],
stdout=subprocess.PIPE)
def prepare_result(self):
# Load string into JSON format and extract resulting text
output = json.loads(self.output)
print("Result from [" + self.dataFile + "]: " + output["text"] + "\n")
file = open("output/output.txt", "w+")
file.write(output["text"])
def process_input_file(self):
# Call to initialize subprocess with VOSK API
self.initialize_process()
# Preprocessing of detected speech
while True:
data = self.process.stdout.read(4000)
if len(data) == 0:
break
if self.rec.AcceptWaveform(data):
self.results.append(self.rec.Result())
# Save extracted output
self.output = self.rec.FinalResult()
# Add extracted text to results
self.results.append(self.output)
# Call to prepare result
self.prepare_result()
def built_srt(self):
# Build SRT File content
for i, res in enumerate(self.results):
jres = json.loads(res)
if not 'result' in jres:
continue
words = jres['result']
for j in range(0, len(words), WORDS_PER_LINE):
line = words[j: j + WORDS_PER_LINE]
s = srt.Subtitle(index=len(self.subs),
content=" ".join([l['word'] for l in line]),
start=datetime.timedelta(seconds=line[0]['start']),
end=datetime.timedelta(seconds=line[-1]['end']))
self.subs.append(s)
self.write_to_srt_file()
def write_to_srt_file(self):
file = open("output/output.srt", "w+")
file.write(srt.compose(self.subs))
def embed_subtitles(self):
# Subtitle Attritbutes
generator = lambda txt: TextClip(txt, font = 'Times New Roman', fontsize = 50, color = 'white')
subtitles = SubtitlesClip("output/output.srt", generator)
# Set original video file and account for video's in landscape mode
video = VideoFileClip(self.dataFile)
if video.rotation == 270:
video = video.resize(video.size[::-1])
video.rotation = 0
# Embed subtitles into original video and write to a new file
result = CompositeVideoClip([video, subtitles.set_pos(('center', 'bottom'))])
result.write_videofile("output/edited_output.mp4", fps = video.fps,
temp_audiofile = "temp-audio.m4a", remove_temp = True, codec = "libx264", audio_codec = "aac")