-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #6 from dssudake/develop
Preprocess and Denoise Audio
- Loading branch information
Showing
32 changed files
with
457 additions
and
88 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
default_app_config = 'processAPI.apps.ProcessapiConfig' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,13 @@ | ||
from django.apps import AppConfig | ||
from django.conf import settings | ||
from tensorflow.keras.models import model_from_json | ||
|
||
|
||
class ProcessapiConfig(AppConfig): | ||
name = 'processAPI' | ||
|
||
json_file = open(settings.MODEL_ARCH_PATH, 'r') | ||
loaded_model_json = json_file.read() | ||
json_file.close() | ||
model = model_from_json(loaded_model_json) | ||
model.load_weights(settings.MODEL_WEIGHT_PATH) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
import os | ||
import numpy as np | ||
import scipy | ||
import librosa | ||
import librosa.display | ||
import noisereduce as nr | ||
import soundfile as sf | ||
|
||
from django.conf import settings | ||
from processAPI.apps import ProcessapiConfig | ||
|
||
|
||
class FeatureExtractor: | ||
def __init__(self, audio, *, windowLength, overlap, sample_rate): | ||
self.audio = audio | ||
self.ffT_length = windowLength | ||
self.window_length = windowLength | ||
self.overlap = overlap | ||
self.sample_rate = sample_rate | ||
self.window = scipy.signal.hamming(self.window_length, sym=False) | ||
|
||
def get_stft_spectrogram(self): | ||
return librosa.stft(self.audio, n_fft=self.ffT_length, win_length=self.window_length, hop_length=self.overlap, | ||
window=self.window, center=True) | ||
|
||
def get_audio_from_stft_spectrogram(self, stft_features): | ||
return librosa.istft(stft_features, win_length=self.window_length, hop_length=self.overlap, | ||
window=self.window, center=True) | ||
|
||
def get_mel_spectrogram(self): | ||
return librosa.feature.melspectrogram(self.audio, sr=self.sample_rate, power=2.0, pad_mode='reflect', | ||
n_fft=self.ffT_length, hop_length=self.overlap, center=True) | ||
|
||
def get_audio_from_mel_spectrogram(self, M): | ||
return librosa.feature.inverse.mel_to_audio(M, sr=self.sample_rate, n_fft=self.ffT_length, hop_length=self.overlap, | ||
win_length=self.window_length, window=self.window, | ||
center=True, pad_mode='reflect', power=2.0, n_iter=32, length=None) | ||
|
||
|
||
def dl_noise_reduce(audio_id, folder_path, file_path, progress_recorder): | ||
windowLength = 256 | ||
overlap = round(0.25 * windowLength) # overlap of 75% | ||
ffTLength = windowLength | ||
# inputFs = 48e3 | ||
fs = 16e3 | ||
numFeatures = ffTLength//2 + 1 | ||
numSegments = 8 | ||
|
||
def prepare_input_features(stft_features): | ||
noisySTFT = np.concatenate( | ||
[stft_features[:, 0:numSegments-1], stft_features], axis=1) | ||
stftSegments = np.zeros( | ||
(numFeatures, numSegments, noisySTFT.shape[1] - numSegments + 1)) | ||
for index in range(noisySTFT.shape[1] - numSegments + 1): | ||
stftSegments[:, :, index] = noisySTFT[:, index:index + numSegments] | ||
return stftSegments | ||
|
||
def read_audio(filepath, sample_rate, normalize=True): | ||
audio, sr = librosa.load(filepath, sr=sample_rate) | ||
if normalize: | ||
div_fac = 1 / np.max(np.abs(audio)) / 3.0 | ||
audio = audio * div_fac | ||
return audio, sr | ||
|
||
def revert_features_to_audio(features, phase, cleanMean=None, cleanStd=None): | ||
if cleanMean and cleanStd: | ||
features = cleanStd * features + cleanMean | ||
phase = np.transpose(phase, (1, 0)) | ||
features = np.squeeze(features) | ||
features = features * np.exp(1j * phase) | ||
features = np.transpose(features, (1, 0)) | ||
return noiseAudioFeatureExtractor.get_audio_from_stft_spectrogram(features) | ||
|
||
model = ProcessapiConfig.model | ||
progress_recorder.set_progress(10, 100) | ||
|
||
noisyAudio, sr = read_audio(file_path, sample_rate=fs) | ||
|
||
noiseAudioFeatureExtractor = FeatureExtractor( | ||
noisyAudio, windowLength=windowLength, overlap=overlap, sample_rate=sr) | ||
noise_stft_features = noiseAudioFeatureExtractor.get_stft_spectrogram() | ||
progress_recorder.set_progress(20, 100) | ||
|
||
noisyPhase = np.angle(noise_stft_features) | ||
print(noisyPhase.shape) | ||
noise_stft_features = np.abs(noise_stft_features) | ||
|
||
mean = np.mean(noise_stft_features) | ||
std = np.std(noise_stft_features) | ||
noise_stft_features = (noise_stft_features - mean) / std | ||
|
||
predictors = prepare_input_features(noise_stft_features) | ||
progress_recorder.set_progress(35, 100) | ||
|
||
predictors = np.reshape( | ||
predictors, (predictors.shape[0], predictors.shape[1], 1, predictors.shape[2])) | ||
predictors = np.transpose(predictors, (3, 0, 1, 2)).astype(np.float32) | ||
print('predictors.shape:', predictors.shape) | ||
progress_recorder.set_progress(45, 100) | ||
|
||
STFTFullyConvolutional = model.predict(predictors) | ||
print(STFTFullyConvolutional.shape) | ||
progress_recorder.set_progress(70, 100) | ||
|
||
denoisedAudioFullyConvolutional = revert_features_to_audio( | ||
STFTFullyConvolutional, noisyPhase, mean, std) | ||
print("Min:", np.min(denoisedAudioFullyConvolutional), | ||
"Max:", np.max(denoisedAudioFullyConvolutional)) | ||
|
||
noisy_part = denoisedAudioFullyConvolutional[0:5250] | ||
|
||
# Perform noise reduction | ||
reduced_noise = nr.reduce_noise( | ||
audio_clip=denoisedAudioFullyConvolutional, noise_clip=noisy_part, verbose=False) | ||
print(type(reduced_noise)) | ||
progress_recorder.set_progress(85, 100) | ||
|
||
# Save Audio | ||
op_file_path = os.path.join(folder_path, 'audio_processed_denoised.wav') | ||
sf.write(op_file_path, reduced_noise, 16000) | ||
|
||
op_file_path_rel = os.path.join( | ||
settings.AUDIO_PROCESSING_ROOT, audio_id, 'audio_processed_denoised.wav') | ||
|
||
return op_file_path_rel |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
import os | ||
|
||
from celery import shared_task | ||
from celery_progress.backend import ProgressRecorder | ||
from django.conf import settings | ||
|
||
from processAPI.models import AudioFile | ||
from processAPI.helper import dl_noise_reduce | ||
|
||
AUDIO_FILE_NAME = 'audio' | ||
INCOMING_AUDIO_EXTENSION = '.mp3' | ||
PROCESSED_AUDIO_PREFIX = 'processed_audio' | ||
PROCESSED_AUDIO_EXTENSION = '.wav' | ||
AUDIO_SAMPLE_RATE = '44100' | ||
AUDIO_BIT_RATE = '64k' | ||
|
||
|
||
@shared_task() | ||
def preprocess_audio(audio_id, audio_file_ext): | ||
""" | ||
Initialize processed_audio with 'wav' format of original audio | ||
""" | ||
try: | ||
global INCOMING_AUDIO_EXTENSION | ||
INCOMING_AUDIO_EXTENSION = audio_file_ext | ||
|
||
folder_path = os.path.join( | ||
settings.MEDIA_ROOT, settings.AUDIO_PROCESSING_ROOT, audio_id) | ||
os.chdir(folder_path) | ||
|
||
# convert original audio to wav | ||
os.system( | ||
'ffmpeg -hide_banner -y -i ' + | ||
AUDIO_FILE_NAME + INCOMING_AUDIO_EXTENSION + | ||
' -ab ' + AUDIO_BIT_RATE + | ||
' -ar ' + AUDIO_SAMPLE_RATE + | ||
' -vn ' + | ||
PROCESSED_AUDIO_PREFIX + PROCESSED_AUDIO_EXTENSION) | ||
|
||
AudioFile.objects.filter(pk=audio_id).update( | ||
processed_audio=os.path.join( | ||
settings.AUDIO_PROCESSING_ROOT, audio_id, PROCESSED_AUDIO_PREFIX + PROCESSED_AUDIO_EXTENSION), | ||
) | ||
except FileNotFoundError: | ||
AudioFile.objects.filter(pk=audio_id).update() | ||
|
||
|
||
@shared_task(bind=True) | ||
def denoise_audio(self, audio_id): | ||
""" | ||
Process audio through deep learning to reduce noise | ||
""" | ||
try: | ||
progress_recorder = ProgressRecorder(self) | ||
|
||
audio_file = AudioFile.objects.get(pk=audio_id) | ||
audio_file_path = audio_file.processed_audio.path | ||
|
||
audio_id = str(audio_id) | ||
folder_path = os.path.join( | ||
settings.MEDIA_ROOT, settings.AUDIO_PROCESSING_ROOT, audio_id) | ||
os.chdir(folder_path) | ||
progress_recorder.set_progress(5, 100) | ||
|
||
op_file_path = dl_noise_reduce( | ||
audio_id, folder_path, audio_file_path, progress_recorder) | ||
progress_recorder.set_progress(95, 100) | ||
|
||
audio_file.processed_audio = op_file_path | ||
audio_file.save() | ||
progress_recorder.set_progress(100, 100) | ||
|
||
return 'AUDIO_DENOISED' | ||
except FileNotFoundError: | ||
AudioFile.objects.filter(pk=audio_id).update() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.