Skip to content

Commit

Permalink
Merge pull request #6 from dssudake/develop
Browse files Browse the repository at this point in the history
Preprocess and Denoise Audio
  • Loading branch information
dssudake authored May 20, 2021
2 parents f29c344 + 35f6619 commit c8610ed
Show file tree
Hide file tree
Showing 32 changed files with 457 additions and 88 deletions.
26 changes: 26 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,29 @@
<h2 align="center">
Audio Processing on Cloud using Deep Learning
</h2>

## Installation

### Basic Setup

- [docker](https://docs.docker.com/get-docker/) and [docker-compose](https://docs.docker.com/compose/install/) will be required for running application.
- Make sure that you install both of them specific to your OS and version (Linux, Windows, Mac)
- `git clone https://github.com/dssudake/AudMIX.git`
- `cd AudMIX`

### Development Environment

```bash
# Build the latest images
docker-compose build

# Start the application
docker-compose up

or

# Add -d flag to run everything in the background
docker-compose up -d
```

Then you can hit http://localhost:3000 in your browser to view frontend and access backend with http://localhost:8000
5 changes: 3 additions & 2 deletions backend/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# pull official base image
FROM python:3.8.3-alpine
FROM tensorflow/tensorflow:2.5.0

# install ffmpeg with linux static build
ARG FFMPEGV=4.2.2
RUN wget -q https://www.johnvansickle.com/ffmpeg/old-releases/ffmpeg-${FFMPEGV}-amd64-static.tar.xz \
RUN curl -O https://www.johnvansickle.com/ffmpeg/old-releases/ffmpeg-${FFMPEGV}-amd64-static.tar.xz \
&& tar xJf ffmpeg-${FFMPEGV}-amd64-static.tar.xz \
&& mv ffmpeg-${FFMPEGV}-amd64-static/ffmpeg /usr/local/bin/ \
&& rm -rf ffmpeg-${FFMPEGV}-amd64-static \
Expand All @@ -20,6 +20,7 @@ ENV PYTHONUNBUFFERED 1
RUN pip install --upgrade pip
COPY ./requirements.txt .
RUN pip install -r requirements.txt
RUN apt-get -y update && apt-get install -y libsndfile1

# copy entrypoint.sh
COPY ./entrypoint.sh .
Expand Down
8 changes: 6 additions & 2 deletions backend/backend/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@
'rest_framework',
'corsheaders',
'drf_yasg',
'file_upload',
'processAPI',
'celery_progress',
'processAPI.apps.ProcessapiConfig',

]

Expand Down Expand Up @@ -144,3 +144,7 @@
CELERY_ACCEPT_CONTENT = ['application/json']
CELERY_RESULT_SERIALIZER = 'json'
CELERY_TASK_SERIALIZER = 'json'

# DL Model Paths
MODEL_ARCH_PATH = os.path.join(BASE_DIR, 'model', 'model_arch.json')
MODEL_WEIGHT_PATH = os.path.join(BASE_DIR, 'model', 'model_weights.h5')
7 changes: 4 additions & 3 deletions backend/backend/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,14 @@
)

urlpatterns = [
path('admin/', admin.site.urls),

# path('api/', include('file_upload.urls')),
path('api/admin/', admin.site.urls),

# Audio Processing API Routes
path('api/', include('processAPI.urls')),

# Endpoint to check celery worker task progress
path('api/task_status/', include('celery_progress.urls')),

# Swagger API Documentation
path('api/docs/', schema_view.with_ui('swagger',
cache_timeout=0), name='schema-swagger-ui'),
Expand Down
1 change: 0 additions & 1 deletion backend/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

# python manage.py flush --no-input
python manage.py makemigrations
python manage.py makemigrations file_upload
python manage.py makemigrations processAPI
python manage.py migrate

Expand Down
Empty file removed backend/file_upload/__init__.py
Empty file.
6 changes: 0 additions & 6 deletions backend/file_upload/admin.py

This file was deleted.

5 changes: 0 additions & 5 deletions backend/file_upload/apps.py

This file was deleted.

9 changes: 0 additions & 9 deletions backend/file_upload/models.py

This file was deleted.

8 changes: 0 additions & 8 deletions backend/file_upload/serializers.py

This file was deleted.

8 changes: 0 additions & 8 deletions backend/file_upload/tasks.py

This file was deleted.

3 changes: 0 additions & 3 deletions backend/file_upload/tests.py

This file was deleted.

10 changes: 0 additions & 10 deletions backend/file_upload/urls.py

This file was deleted.

9 changes: 0 additions & 9 deletions backend/file_upload/views.py

This file was deleted.

1 change: 1 addition & 0 deletions backend/model/model_arch.json

Large diffs are not rendered by default.

Binary file added backend/model/model_weights.h5
Binary file not shown.
1 change: 1 addition & 0 deletions backend/processAPI/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
default_app_config = 'processAPI.apps.ProcessapiConfig'
8 changes: 8 additions & 0 deletions backend/processAPI/apps.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
from django.apps import AppConfig
from django.conf import settings
from tensorflow.keras.models import model_from_json


class ProcessapiConfig(AppConfig):
name = 'processAPI'

json_file = open(settings.MODEL_ARCH_PATH, 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
model.load_weights(settings.MODEL_WEIGHT_PATH)
125 changes: 125 additions & 0 deletions backend/processAPI/helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import os
import numpy as np
import scipy
import librosa
import librosa.display
import noisereduce as nr
import soundfile as sf

from django.conf import settings
from processAPI.apps import ProcessapiConfig


class FeatureExtractor:
def __init__(self, audio, *, windowLength, overlap, sample_rate):
self.audio = audio
self.ffT_length = windowLength
self.window_length = windowLength
self.overlap = overlap
self.sample_rate = sample_rate
self.window = scipy.signal.hamming(self.window_length, sym=False)

def get_stft_spectrogram(self):
return librosa.stft(self.audio, n_fft=self.ffT_length, win_length=self.window_length, hop_length=self.overlap,
window=self.window, center=True)

def get_audio_from_stft_spectrogram(self, stft_features):
return librosa.istft(stft_features, win_length=self.window_length, hop_length=self.overlap,
window=self.window, center=True)

def get_mel_spectrogram(self):
return librosa.feature.melspectrogram(self.audio, sr=self.sample_rate, power=2.0, pad_mode='reflect',
n_fft=self.ffT_length, hop_length=self.overlap, center=True)

def get_audio_from_mel_spectrogram(self, M):
return librosa.feature.inverse.mel_to_audio(M, sr=self.sample_rate, n_fft=self.ffT_length, hop_length=self.overlap,
win_length=self.window_length, window=self.window,
center=True, pad_mode='reflect', power=2.0, n_iter=32, length=None)


def dl_noise_reduce(audio_id, folder_path, file_path, progress_recorder):
windowLength = 256
overlap = round(0.25 * windowLength) # overlap of 75%
ffTLength = windowLength
# inputFs = 48e3
fs = 16e3
numFeatures = ffTLength//2 + 1
numSegments = 8

def prepare_input_features(stft_features):
noisySTFT = np.concatenate(
[stft_features[:, 0:numSegments-1], stft_features], axis=1)
stftSegments = np.zeros(
(numFeatures, numSegments, noisySTFT.shape[1] - numSegments + 1))
for index in range(noisySTFT.shape[1] - numSegments + 1):
stftSegments[:, :, index] = noisySTFT[:, index:index + numSegments]
return stftSegments

def read_audio(filepath, sample_rate, normalize=True):
audio, sr = librosa.load(filepath, sr=sample_rate)
if normalize:
div_fac = 1 / np.max(np.abs(audio)) / 3.0
audio = audio * div_fac
return audio, sr

def revert_features_to_audio(features, phase, cleanMean=None, cleanStd=None):
if cleanMean and cleanStd:
features = cleanStd * features + cleanMean
phase = np.transpose(phase, (1, 0))
features = np.squeeze(features)
features = features * np.exp(1j * phase)
features = np.transpose(features, (1, 0))
return noiseAudioFeatureExtractor.get_audio_from_stft_spectrogram(features)

model = ProcessapiConfig.model
progress_recorder.set_progress(10, 100)

noisyAudio, sr = read_audio(file_path, sample_rate=fs)

noiseAudioFeatureExtractor = FeatureExtractor(
noisyAudio, windowLength=windowLength, overlap=overlap, sample_rate=sr)
noise_stft_features = noiseAudioFeatureExtractor.get_stft_spectrogram()
progress_recorder.set_progress(20, 100)

noisyPhase = np.angle(noise_stft_features)
print(noisyPhase.shape)
noise_stft_features = np.abs(noise_stft_features)

mean = np.mean(noise_stft_features)
std = np.std(noise_stft_features)
noise_stft_features = (noise_stft_features - mean) / std

predictors = prepare_input_features(noise_stft_features)
progress_recorder.set_progress(35, 100)

predictors = np.reshape(
predictors, (predictors.shape[0], predictors.shape[1], 1, predictors.shape[2]))
predictors = np.transpose(predictors, (3, 0, 1, 2)).astype(np.float32)
print('predictors.shape:', predictors.shape)
progress_recorder.set_progress(45, 100)

STFTFullyConvolutional = model.predict(predictors)
print(STFTFullyConvolutional.shape)
progress_recorder.set_progress(70, 100)

denoisedAudioFullyConvolutional = revert_features_to_audio(
STFTFullyConvolutional, noisyPhase, mean, std)
print("Min:", np.min(denoisedAudioFullyConvolutional),
"Max:", np.max(denoisedAudioFullyConvolutional))

noisy_part = denoisedAudioFullyConvolutional[0:5250]

# Perform noise reduction
reduced_noise = nr.reduce_noise(
audio_clip=denoisedAudioFullyConvolutional, noise_clip=noisy_part, verbose=False)
print(type(reduced_noise))
progress_recorder.set_progress(85, 100)

# Save Audio
op_file_path = os.path.join(folder_path, 'audio_processed_denoised.wav')
sf.write(op_file_path, reduced_noise, 16000)

op_file_path_rel = os.path.join(
settings.AUDIO_PROCESSING_ROOT, audio_id, 'audio_processed_denoised.wav')

return op_file_path_rel
75 changes: 75 additions & 0 deletions backend/processAPI/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import os

from celery import shared_task
from celery_progress.backend import ProgressRecorder
from django.conf import settings

from processAPI.models import AudioFile
from processAPI.helper import dl_noise_reduce

AUDIO_FILE_NAME = 'audio'
INCOMING_AUDIO_EXTENSION = '.mp3'
PROCESSED_AUDIO_PREFIX = 'processed_audio'
PROCESSED_AUDIO_EXTENSION = '.wav'
AUDIO_SAMPLE_RATE = '44100'
AUDIO_BIT_RATE = '64k'


@shared_task()
def preprocess_audio(audio_id, audio_file_ext):
"""
Initialize processed_audio with 'wav' format of original audio
"""
try:
global INCOMING_AUDIO_EXTENSION
INCOMING_AUDIO_EXTENSION = audio_file_ext

folder_path = os.path.join(
settings.MEDIA_ROOT, settings.AUDIO_PROCESSING_ROOT, audio_id)
os.chdir(folder_path)

# convert original audio to wav
os.system(
'ffmpeg -hide_banner -y -i ' +
AUDIO_FILE_NAME + INCOMING_AUDIO_EXTENSION +
' -ab ' + AUDIO_BIT_RATE +
' -ar ' + AUDIO_SAMPLE_RATE +
' -vn ' +
PROCESSED_AUDIO_PREFIX + PROCESSED_AUDIO_EXTENSION)

AudioFile.objects.filter(pk=audio_id).update(
processed_audio=os.path.join(
settings.AUDIO_PROCESSING_ROOT, audio_id, PROCESSED_AUDIO_PREFIX + PROCESSED_AUDIO_EXTENSION),
)
except FileNotFoundError:
AudioFile.objects.filter(pk=audio_id).update()


@shared_task(bind=True)
def denoise_audio(self, audio_id):
"""
Process audio through deep learning to reduce noise
"""
try:
progress_recorder = ProgressRecorder(self)

audio_file = AudioFile.objects.get(pk=audio_id)
audio_file_path = audio_file.processed_audio.path

audio_id = str(audio_id)
folder_path = os.path.join(
settings.MEDIA_ROOT, settings.AUDIO_PROCESSING_ROOT, audio_id)
os.chdir(folder_path)
progress_recorder.set_progress(5, 100)

op_file_path = dl_noise_reduce(
audio_id, folder_path, audio_file_path, progress_recorder)
progress_recorder.set_progress(95, 100)

audio_file.processed_audio = op_file_path
audio_file.save()
progress_recorder.set_progress(100, 100)

return 'AUDIO_DENOISED'
except FileNotFoundError:
AudioFile.objects.filter(pk=audio_id).update()
2 changes: 2 additions & 0 deletions backend/processAPI/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,6 @@
name='audio-list-create'),
path('process_audio/<uuid:pk>/', views.AudioFileDetail.as_view(),
name='audio-detail'),
path('process_audio/<uuid:pk>/reduce_noise/', views.AudioReduceNoise.as_view(),
name='audio-reduce-noise'),
]
Loading

0 comments on commit c8610ed

Please sign in to comment.