Skip to content

Commit

Permalink
Auto-submit the whisper extension transcription
Browse files Browse the repository at this point in the history
  • Loading branch information
oobabooga committed Apr 7, 2023
1 parent 102b00f commit 5573874
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 24 deletions.
5 changes: 5 additions & 0 deletions css/chat.css
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,8 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
.wrap.svelte-6roggh.svelte-6roggh {
max-height: 92.5%;
}

/* This is for the microphone button in the whisper extension */
.sm.svelte-1ipelgc {
width: 100%;
}
36 changes: 13 additions & 23 deletions extensions/whisper_stt/script.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import gradio as gr
import speech_recognition as sr
from modules import shared

input_hijack = {
'state': False,
'value': ["", ""]
}


def do_stt(audio, text_state=""):
def do_stt(audio):
transcription = ""
r = sr.Recognizer()

Expand All @@ -21,34 +22,23 @@ def do_stt(audio, text_state=""):
except sr.RequestError as e:
print("Could not request results from Whisper", e)

input_hijack.update({"state": True, "value": [transcription, transcription]})
return transcription

text_state += transcription + " "
return text_state, text_state


def update_hijack(val):
input_hijack.update({"state": True, "value": [val, val]})
return val


def auto_transcribe(audio, audio_auto, text_state=""):
def auto_transcribe(audio, auto_submit):
if audio is None:
return "", ""
if audio_auto:
return do_stt(audio, text_state)
return "", ""

transcription = do_stt(audio)
if auto_submit:
input_hijack.update({"state": True, "value": [transcription, transcription]})

return transcription, None


def ui():
tr_state = gr.State(value="")
output_transcription = gr.Textbox(label="STT-Input",
placeholder="Speech Preview. Click \"Generate\" to send",
interactive=True)
output_transcription.change(fn=update_hijack, inputs=[output_transcription], outputs=[tr_state])
audio_auto = gr.Checkbox(label="Auto-Transcribe", value=True)
with gr.Row():
audio = gr.Audio(source="microphone")
audio.change(fn=auto_transcribe, inputs=[audio, audio_auto, tr_state], outputs=[output_transcription, tr_state])
transcribe_button = gr.Button(value="Transcribe")
transcribe_button.click(do_stt, inputs=[audio, tr_state], outputs=[output_transcription, tr_state])
auto_submit = gr.Checkbox(label='Submit the transcribed audio automatically', value=True)
audio.change(fn=auto_transcribe, inputs=[audio, auto_submit], outputs=[shared.gradio['textbox'], audio])
audio.change(None, auto_submit, None, _js="(check) => {if (check) { document.getElementById('Generate').click() }}")
2 changes: 1 addition & 1 deletion server.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def create_interface():
shared.gradio['display'] = gr.HTML(value=chat_html_wrapper(shared.history['visible'], shared.settings['name1'], shared.settings['name2'], 'cai-chat'))
shared.gradio['textbox'] = gr.Textbox(label='Input')
with gr.Row():
shared.gradio['Generate'] = gr.Button('Generate')
shared.gradio['Generate'] = gr.Button('Generate', elem_id='Generate')
shared.gradio['Stop'] = gr.Button('Stop', elem_id="stop")
with gr.Row():
shared.gradio['Impersonate'] = gr.Button('Impersonate')
Expand Down

0 comments on commit 5573874

Please sign in to comment.