diff --git a/ios/VoskApiTest.xcodeproj/project.pbxproj b/ios/VoskApiTest.xcodeproj/project.pbxproj index a82fd1d5..f38d4199 100644 --- a/ios/VoskApiTest.xcodeproj/project.pbxproj +++ b/ios/VoskApiTest.xcodeproj/project.pbxproj @@ -17,7 +17,8 @@ 92375244240C6DAF00DD6076 /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 92375243240C6DAF00DD6076 /* Accelerate.framework */; }; 92375246240C6DC900DD6076 /* libstdc++.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 92375245240C6DC900DD6076 /* libstdc++.tbd */; }; 92375274240C6F1E00DD6076 /* 10001-90210-01803.wav in Resources */ = {isa = PBXBuildFile; fileRef = 92375256240C6E3D00DD6076 /* 10001-90210-01803.wav */; }; - 92D86BD5253F823F0040D53F /* vosk-model-small-en-us-0.4 in Resources */ = {isa = PBXBuildFile; fileRef = 92D86BD3253F823E0040D53F /* vosk-model-small-en-us-0.4 */; }; + 92BACED125BE125A00B5CC93 /* vosk-model-small-en-us-0.15 in Resources */ = {isa = PBXBuildFile; fileRef = 928CC50C25BE124400490481 /* vosk-model-small-en-us-0.15 */; }; + 92D6B8D325BDFEAC007FF08D /* VoskModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 92D6B8D225BDFEAC007FF08D /* VoskModel.swift */; }; 92D86BD6253F823F0040D53F /* vosk-model-spk-0.4 in Resources */ = {isa = PBXBuildFile; fileRef = 92D86BD4253F823F0040D53F /* vosk-model-spk-0.4 */; }; /* End PBXBuildFile section */ @@ -34,9 +35,10 @@ 92375243240C6DAF00DD6076 /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; }; 92375245240C6DC900DD6076 /* libstdc++.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = "libstdc++.tbd"; path = "usr/lib/libstdc++.tbd"; sourceTree = SDKROOT; }; 92375256240C6E3D00DD6076 /* 10001-90210-01803.wav */ = {isa = PBXFileReference; lastKnownFileType = audio.wav; path = "10001-90210-01803.wav"; sourceTree = ""; }; + 928CC50C25BE124400490481 /* vosk-model-small-en-us-0.15 */ = {isa = PBXFileReference; lastKnownFileType = folder; name = "vosk-model-small-en-us-0.15"; path = "/Users/shmyrev/Documents/IOS/VoskApiTest/VoskApiTest/Vosk/vosk-model-small-en-us-0.15"; sourceTree = ""; }; 92AA22AD244CDD1200DA464B /* vosk_api.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = vosk_api.h; sourceTree = ""; }; 92AA22AE244CDD5200DA464B /* bridging.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bridging.h; sourceTree = ""; }; - 92D86BD3253F823E0040D53F /* vosk-model-small-en-us-0.4 */ = {isa = PBXFileReference; lastKnownFileType = folder; path = "vosk-model-small-en-us-0.4"; sourceTree = ""; }; + 92D6B8D225BDFEAC007FF08D /* VoskModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VoskModel.swift; sourceTree = ""; }; 92D86BD4253F823F0040D53F /* vosk-model-spk-0.4 */ = {isa = PBXFileReference; lastKnownFileType = folder; path = "vosk-model-spk-0.4"; sourceTree = ""; }; /* End PBXFileReference section */ @@ -83,6 +85,7 @@ 9237522A240C550B00DD6076 /* LaunchScreen.storyboard */, 9237522D240C550B00DD6076 /* Info.plist */, 92375233240C558900DD6076 /* Vosk.swift */, + 92D6B8D225BDFEAC007FF08D /* VoskModel.swift */, ); path = VoskApiTest; sourceTree = ""; @@ -90,7 +93,7 @@ 92375239240C642000DD6076 /* Vosk */ = { isa = PBXGroup; children = ( - 92D86BD3253F823E0040D53F /* vosk-model-small-en-us-0.4 */, + 928CC50C25BE124400490481 /* vosk-model-small-en-us-0.15 */, 92D86BD4253F823F0040D53F /* vosk-model-spk-0.4 */, 92375256240C6E3D00DD6076 /* 10001-90210-01803.wav */, 92AA22AD244CDD1200DA464B /* vosk_api.h */, @@ -169,12 +172,12 @@ isa = PBXResourcesBuildPhase; buildActionMask = 2147483647; files = ( + 92BACED125BE125A00B5CC93 /* vosk-model-small-en-us-0.15 in Resources */, 92375274240C6F1E00DD6076 /* 10001-90210-01803.wav in Resources */, 9237522C240C550B00DD6076 /* LaunchScreen.storyboard in Resources */, 92375229240C550B00DD6076 /* Assets.xcassets in Resources */, 92D86BD6253F823F0040D53F /* vosk-model-spk-0.4 in Resources */, 92375227240C550B00DD6076 /* Main.storyboard in Resources */, - 92D86BD5253F823F0040D53F /* vosk-model-small-en-us-0.4 in Resources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -187,6 +190,7 @@ files = ( 92375224240C550B00DD6076 /* ViewController.swift in Sources */, 92375222240C550B00DD6076 /* AppDelegate.swift in Sources */, + 92D6B8D325BDFEAC007FF08D /* VoskModel.swift in Sources */, 92375234240C558900DD6076 /* Vosk.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; diff --git a/ios/VoskApiTest.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/ios/VoskApiTest.xcodeproj/project.xcworkspace/contents.xcworkspacedata index 8dc11b73..3609e01e 100644 --- a/ios/VoskApiTest.xcodeproj/project.xcworkspace/contents.xcworkspacedata +++ b/ios/VoskApiTest.xcodeproj/project.xcworkspace/contents.xcworkspacedata @@ -2,10 +2,7 @@ - - + location = "group:/Users/shmyrev/Documents/IOS/VoskApiTest/VoskApiTest/Vosk/vosk-model-small-en-us-0.15"> diff --git a/ios/VoskApiTest/Base.lproj/Main.storyboard b/ios/VoskApiTest/Base.lproj/Main.storyboard index 978554c1..59c6203f 100644 --- a/ios/VoskApiTest/Base.lproj/Main.storyboard +++ b/ios/VoskApiTest/Base.lproj/Main.storyboard @@ -1,6 +1,6 @@ - - + + @@ -10,23 +10,52 @@ - + - - - - + + + + + + + + + + + + + + + + + + + - - - + - + + + - + - + diff --git a/ios/VoskApiTest/ViewController.swift b/ios/VoskApiTest/ViewController.swift index 495490fc..44fd513a 100644 --- a/ios/VoskApiTest/ViewController.swift +++ b/ios/VoskApiTest/ViewController.swift @@ -3,30 +3,113 @@ // VoskApiTest // // Created by Niсkolay Shmyrev on 01.03.20. -// Copyright © 2020 Alpha Cephei. All rights reserved. +// Copyright © 2020-2021 Alpha Cephei. All rights reserved. // import UIKit +import AVFoundation -class ViewController: UIViewController { - - @IBOutlet var mainText: UITextView! +enum WorkMode { + case stopped + case microphone + case file +} - override func viewDidLoad() { - super.viewDidLoad() +class ViewController: UIViewController { - DispatchQueue.global(qos: .userInitiated).async { - DispatchQueue.main.async { - self.mainText.text = "Processing file..." + var mode: WorkMode! + + @IBOutlet weak var recognizeFile: UIButton! + @IBOutlet weak var mainText: UITextView! + @IBOutlet weak var recognizeMicrophone: UIButton! + + var audioEngine : AVAudioEngine! + var processingQueue: DispatchQueue! + var model : VoskModel! + + func setMode(mode: WorkMode) { + switch mode { + case .stopped: + self.recognizeFile.isEnabled = true + self.recognizeMicrophone.isEnabled = true + self.recognizeMicrophone.setTitle("Recognize Microphone",for: .normal) + case .microphone: + self.recognizeFile.isEnabled = false + self.recognizeMicrophone.isEnabled = true + self.recognizeMicrophone.setTitle("Stop Microphone",for: .normal) + self.mainText.text = "" + case .file: + self.recognizeFile.isEnabled = false + self.recognizeMicrophone.isEnabled = false + self.mainText.text = "Processing file..." + } + self.mode = mode + } + + func startAudioEngine() { + do { + + // Create a new audio engine. + audioEngine = AVAudioEngine() + + let inputNode = audioEngine.inputNode + let formatInput = inputNode.inputFormat(forBus: 0) + let formatPcm = AVAudioFormat.init(commonFormat: AVAudioCommonFormat.pcmFormatInt16, sampleRate: formatInput.sampleRate, channels: 1, interleaved: true) + + let recognizer = Vosk(model: model, sampleRate: Float(formatInput.sampleRate)) + + inputNode.installTap(onBus: 0, + bufferSize: UInt32(formatInput.sampleRate / 10), + format: formatPcm) { buffer, time in + self.processingQueue.async { + let res = recognizer.recognizeData(buffer: buffer) + DispatchQueue.main.async { + self.mainText.text = res + "\n" + self.mainText.text + } + } } - let vosk = Vosk() - let res = vosk.recognizeFile() + + // Start the stream of audio data. + audioEngine.prepare() + try audioEngine.start() + } catch { + print("Unable to start AVAudioEngine: \(error.localizedDescription)") + } + } + + func stopAudioEngine() { + audioEngine.stop() + } + + @IBAction func runRecognizeMicrohpone(_ sender: Any) { + if (mode == .stopped) { + setMode(mode: .microphone) + startAudioEngine() + } else { + stopAudioEngine() + setMode(mode: .stopped) + } + } + + @IBAction func runRecognizeFile(_ sender: Any) { + setMode(mode: .file) + processingQueue.async { + let recognizer = Vosk(model: self.model, sampleRate: 16000.0) + let res = recognizer.recognizeFile() DispatchQueue.main.async { self.mainText.text = res + self.setMode(mode: .stopped) } } } - + + override func viewDidLoad() { + super.viewDidLoad() + setMode(mode: .stopped) + processingQueue = DispatchQueue(label: "recognizerQueue") + model = VoskModel() + } + override func didReceiveMemoryWarning() { super.didReceiveMemoryWarning() } diff --git a/ios/VoskApiTest/Vosk.swift b/ios/VoskApiTest/Vosk.swift index ca5ff683..104b895a 100644 --- a/ios/VoskApiTest/Vosk.swift +++ b/ios/VoskApiTest/Vosk.swift @@ -3,40 +3,52 @@ // VoskApiTest // // Created by Niсkolay Shmyrev on 01.03.20. -// Copyright © 2020 Alpha Cephei. All rights reserved. +// Copyright © 2020-2021 Alpha Cephei. All rights reserved. // import Foundation +import AVFoundation public final class Vosk { + var recognizer : OpaquePointer! + + init(model: VoskModel, sampleRate: Float) { + recognizer = vosk_recognizer_new_spk(model.model, model.spkModel, sampleRate) + } + + deinit { + vosk_recognizer_free(recognizer); + } + func recognizeFile() -> String { var sres = "" + if let resourcePath = Bundle.main.resourcePath { - - // Set to -1 to disable logs - vosk_set_log_level(0); - - let modelPath = resourcePath + "/vosk-model-small-en-us-0.4" - let spkModelPath = resourcePath + "/vosk-model-spk-0.4" - - let model = vosk_model_new(modelPath) - let spkModel = vosk_spk_model_new(spkModelPath) - let recognizer = vosk_recognizer_new_spk(model, spkModel, 16000.0) - + let audioFile = URL(fileURLWithPath: resourcePath + "/10001-90210-01803.wav") + if let data = try? Data(contentsOf: audioFile) { - let _ = data.withUnsafeBytes { - vosk_recognizer_accept_waveform(recognizer, $0, Int32(data.count)) - } - let res = vosk_recognizer_final_result(recognizer); - sres = String(validatingUTF8: res!)!; - print(sres); + let _ = data.withUnsafeBytes { + vosk_recognizer_accept_waveform(recognizer, $0, Int32(data.count)) + } + let res = vosk_recognizer_final_result(recognizer); + sres = String(validatingUTF8: res!)!; + print(sres); } - - vosk_recognizer_free(recognizer) - vosk_model_free(model) } + return sres } + + + func recognizeData(buffer : AVAudioPCMBuffer) -> String { + let dataLen = Int(buffer.frameLength * 2) + let channels = UnsafeBufferPointer(start: buffer.int16ChannelData, count: 1) + let endOfSpeech = channels[0].withMemoryRebound(to: Int8.self, capacity: dataLen) { + vosk_recognizer_accept_waveform(recognizer, $0, Int32(dataLen)) + } + let res = endOfSpeech == 1 ?vosk_recognizer_result(recognizer) :vosk_recognizer_partial_result(recognizer) + return String(validatingUTF8: res!)!; + } } diff --git a/ios/VoskApiTest/VoskModel.swift b/ios/VoskApiTest/VoskModel.swift new file mode 100644 index 00000000..aa2a2909 --- /dev/null +++ b/ios/VoskApiTest/VoskModel.swift @@ -0,0 +1,36 @@ +// +// Vosk.swift +// VoskApiTest +// +// Created by Niсkolay Shmyrev on 01.03.20. +// Copyright © 2020-2021 Alpha Cephei. All rights reserved. +// + +import Foundation + +public final class VoskModel { + + var model : OpaquePointer! + var spkModel : OpaquePointer! + + init() { + + // Set to -1 to disable logs + vosk_set_log_level(0); + + if let resourcePath = Bundle.main.resourcePath { + let modelPath = resourcePath + "/vosk-model-small-en-us-0.15" + let spkModelPath = resourcePath + "/vosk-model-spk-0.4" + + model = vosk_model_new(modelPath) + spkModel = vosk_spk_model_new(spkModelPath) + } + } + + deinit { + vosk_model_free(model) + vosk_spk_model_free(spkModel) + } + +} +