- Filter onset detection by voice frequency
Brain
should just be various speach to text contrib classes
https://github.com/cmusphinx/pocketsphinx/blob/master/swig/python/test/kws_test.py
http://suendermann.com/su/pdf/essv2014.pdf
#!/usr/bin/python
import sys, os
from pocketsphinx import *
stream = open(sys.argv[1])
modeldir = "/home/shmyrev/local/share/pocketsphinx/model"
# Create a decoder with certain model
config = Decoder.default_config()
config.set_string('-hmm', os.path.join(modeldir, 'hmm/en_US/hub4wsj_sc_8k'))
config.set_string('-dict', os.path.join(modeldir, 'lm/en_US/cmu07a.dic'))
config.set_string('-keyphrase', 'about')
config.set_float('-kws_threshold', 1e-40)
decoder = Decoder(config)
decoder.start_utt('spotting')
while True:
buf = stream.read(1024)
decoder.process_raw(buf, False, False)
if decoder.hyp() != None and decoder.hyp().hypstr == 'about':
print "Detected keyword, restarting search"
decoder.end_utt()
decoder.start_utt('spotting')
# ====================================================================
# Copyright (c) 2013 Carnegie Mellon University. All rights
# reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# This work was supported in part by funding from the Defense Advanced
# Research Projects Agency and the National Science Foundation of the
# United States of America, and the CMU Sphinx Speech Consortium.
#
# THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
# ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
# NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# ====================================================================
import sys, os
from pocketsphinx import *
modeldir = "../../../model"
datadir = "../../../test/data"
# Create a decoder with certain model
config = Decoder.default_config()
config.set_string('-hmm', os.path.join(modeldir, 'hmm/en_US/hub4wsj_sc_8k'))
config.set_string('-dict', os.path.join(modeldir, 'lm/en_US/cmu07a.dic'))
config.set_string('-keyphrase', 'forward')
config.set_float('-kws_threshold', 1e-20)
# Open file to read the data
stream = open(os.path.join(datadir, "goforward.raw"))
# Alternatively you can read from microphone
# import pyaudio
#
# p = pyaudio.PyAudio()
# stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024)
# stream.start_stream()
# Process audio chunk by chunk. On keyword detected perform action and restart search
decoder = Decoder(config)
decoder.start_utt('spotting')
while True:
buf = stream.read(1024)
if not buf:
break
decoder.process_raw(buf, False, False)
if decoder.hyp() != None and decoder.hyp().hypstr == 'forward':
print "Detected keyword, restarting search"
decoder.end_utt()
decoder.start_utt('spotting')