Merge pull request #6 from dssudake/develop

Preprocess and Denoise Audio
dssudake · May 20, 2021 · c8610ed · c8610ed
2 parents f29c344 + 35f6619
commit c8610ed
Show file tree

Hide file tree

Showing 32 changed files with 457 additions and 88 deletions.
diff --git a/README.md b/README.md
@@ -5,3 +5,29 @@
 <h2 align="center"> 
   Audio Processing on Cloud using Deep Learning
 </h2>
+
+## Installation
+
+### Basic Setup
+
+- [docker](https://docs.docker.com/get-docker/) and [docker-compose](https://docs.docker.com/compose/install/) will be required for running application.
+- Make sure that you install both of them specific to your OS and version (Linux, Windows, Mac)
+- `git clone https://github.com/dssudake/AudMIX.git`
+- `cd AudMIX`
+
+### Development Environment
+
+```bash
+# Build the latest images
+docker-compose build
+
+# Start the application
+docker-compose up
+
+        or
+
+# Add -d flag to run everything in the background
+docker-compose up -d
+```
+
+Then you can hit http://localhost:3000 in your browser to view frontend and access backend with http://localhost:8000
diff --git a/backend/Dockerfile b/backend/Dockerfile
@@ -1,9 +1,9 @@
 # pull official base image
-FROM python:3.8.3-alpine
+FROM tensorflow/tensorflow:2.5.0
 
 # install ffmpeg with linux static build
 ARG FFMPEGV=4.2.2
-RUN wget -q https://www.johnvansickle.com/ffmpeg/old-releases/ffmpeg-${FFMPEGV}-amd64-static.tar.xz \
+RUN curl -O https://www.johnvansickle.com/ffmpeg/old-releases/ffmpeg-${FFMPEGV}-amd64-static.tar.xz \
     && tar xJf ffmpeg-${FFMPEGV}-amd64-static.tar.xz \
     && mv ffmpeg-${FFMPEGV}-amd64-static/ffmpeg /usr/local/bin/ \
     && rm -rf ffmpeg-${FFMPEGV}-amd64-static \
@@ -20,6 +20,7 @@ ENV PYTHONUNBUFFERED 1
 RUN pip install --upgrade pip
 COPY ./requirements.txt .
 RUN pip install -r requirements.txt
+RUN apt-get -y update && apt-get install -y libsndfile1
 
 # copy entrypoint.sh
 COPY ./entrypoint.sh .

diff --git a/backend/backend/settings.py b/backend/backend/settings.py
@@ -43,8 +43,8 @@
     'rest_framework',
     'corsheaders',
     'drf_yasg',
-    'file_upload',
-    'processAPI',
+    'celery_progress',
+    'processAPI.apps.ProcessapiConfig',
 
 ]
 
@@ -144,3 +144,7 @@
 CELERY_ACCEPT_CONTENT = ['application/json']
 CELERY_RESULT_SERIALIZER = 'json'
 CELERY_TASK_SERIALIZER = 'json'
+
+# DL Model Paths
+MODEL_ARCH_PATH = os.path.join(BASE_DIR, 'model', 'model_arch.json')
+MODEL_WEIGHT_PATH = os.path.join(BASE_DIR, 'model', 'model_weights.h5')
diff --git a/backend/backend/urls.py b/backend/backend/urls.py
@@ -25,13 +25,14 @@
 )
 
 urlpatterns = [
-    path('admin/', admin.site.urls),
-
-    # path('api/', include('file_upload.urls')),
+    path('api/admin/', admin.site.urls),
 
     # Audio Processing API Routes
     path('api/', include('processAPI.urls')),
 
+    # Endpoint to check celery worker task progress
+    path('api/task_status/', include('celery_progress.urls')),
+
     # Swagger API Documentation
     path('api/docs/', schema_view.with_ui('swagger',
          cache_timeout=0), name='schema-swagger-ui'),

diff --git a/backend/entrypoint.sh b/backend/entrypoint.sh
@@ -2,7 +2,6 @@
 
 # python manage.py flush --no-input
 python manage.py makemigrations
-python manage.py makemigrations file_upload
 python manage.py makemigrations processAPI
 python manage.py migrate
 

diff --git a/backend/file_upload/__init__.py b/backend/file_upload/__init__.py
diff --git a/backend/file_upload/admin.py b/backend/file_upload/admin.py
diff --git a/backend/file_upload/apps.py b/backend/file_upload/apps.py
diff --git a/backend/file_upload/models.py b/backend/file_upload/models.py
diff --git a/backend/file_upload/serializers.py b/backend/file_upload/serializers.py
diff --git a/backend/file_upload/tasks.py b/backend/file_upload/tasks.py
diff --git a/backend/file_upload/tests.py b/backend/file_upload/tests.py
diff --git a/backend/file_upload/urls.py b/backend/file_upload/urls.py
diff --git a/backend/file_upload/views.py b/backend/file_upload/views.py
diff --git a/backend/model/model_arch.json b/backend/model/model_arch.json
diff --git a/backend/model/model_weights.h5 b/backend/model/model_weights.h5
diff --git a/backend/processAPI/__init__.py b/backend/processAPI/__init__.py
@@ -0,0 +1 @@
+default_app_config = 'processAPI.apps.ProcessapiConfig'
diff --git a/backend/processAPI/apps.py b/backend/processAPI/apps.py
@@ -1,5 +1,13 @@
 from django.apps import AppConfig
+from django.conf import settings
+from tensorflow.keras.models import model_from_json
 
 
 class ProcessapiConfig(AppConfig):
     name = 'processAPI'
+
+    json_file = open(settings.MODEL_ARCH_PATH, 'r')
+    loaded_model_json = json_file.read()
+    json_file.close()
+    model = model_from_json(loaded_model_json)
+    model.load_weights(settings.MODEL_WEIGHT_PATH)
diff --git a/backend/processAPI/helper.py b/backend/processAPI/helper.py
@@ -0,0 +1,125 @@
+import os
+import numpy as np
+import scipy
+import librosa
+import librosa.display
+import noisereduce as nr
+import soundfile as sf
+
+from django.conf import settings
+from processAPI.apps import ProcessapiConfig
+
+
+class FeatureExtractor:
+    def __init__(self, audio, *, windowLength, overlap, sample_rate):
+        self.audio = audio
+        self.ffT_length = windowLength
+        self.window_length = windowLength
+        self.overlap = overlap
+        self.sample_rate = sample_rate
+        self.window = scipy.signal.hamming(self.window_length, sym=False)
+
+    def get_stft_spectrogram(self):
+        return librosa.stft(self.audio, n_fft=self.ffT_length, win_length=self.window_length, hop_length=self.overlap,
+                            window=self.window, center=True)
+
+    def get_audio_from_stft_spectrogram(self, stft_features):
+        return librosa.istft(stft_features, win_length=self.window_length, hop_length=self.overlap,
+                             window=self.window, center=True)
+
+    def get_mel_spectrogram(self):
+        return librosa.feature.melspectrogram(self.audio, sr=self.sample_rate, power=2.0, pad_mode='reflect',
+                                              n_fft=self.ffT_length, hop_length=self.overlap, center=True)
+
+    def get_audio_from_mel_spectrogram(self, M):
+        return librosa.feature.inverse.mel_to_audio(M, sr=self.sample_rate, n_fft=self.ffT_length, hop_length=self.overlap,
+                                                    win_length=self.window_length, window=self.window,
+                                                    center=True, pad_mode='reflect', power=2.0, n_iter=32, length=None)
+
+
+def dl_noise_reduce(audio_id, folder_path, file_path, progress_recorder):
+    windowLength = 256
+    overlap = round(0.25 * windowLength)  # overlap of 75%
+    ffTLength = windowLength
+    # inputFs = 48e3
+    fs = 16e3
+    numFeatures = ffTLength//2 + 1
+    numSegments = 8
+
+    def prepare_input_features(stft_features):
+        noisySTFT = np.concatenate(
+            [stft_features[:, 0:numSegments-1], stft_features], axis=1)
+        stftSegments = np.zeros(
+            (numFeatures, numSegments, noisySTFT.shape[1] - numSegments + 1))
+        for index in range(noisySTFT.shape[1] - numSegments + 1):
+            stftSegments[:, :, index] = noisySTFT[:, index:index + numSegments]
+        return stftSegments
+
+    def read_audio(filepath, sample_rate, normalize=True):
+        audio, sr = librosa.load(filepath, sr=sample_rate)
+        if normalize:
+            div_fac = 1 / np.max(np.abs(audio)) / 3.0
+            audio = audio * div_fac
+        return audio, sr
+
+    def revert_features_to_audio(features, phase, cleanMean=None, cleanStd=None):
+        if cleanMean and cleanStd:
+            features = cleanStd * features + cleanMean
+        phase = np.transpose(phase, (1, 0))
+        features = np.squeeze(features)
+        features = features * np.exp(1j * phase)
+        features = np.transpose(features, (1, 0))
+        return noiseAudioFeatureExtractor.get_audio_from_stft_spectrogram(features)
+
+    model = ProcessapiConfig.model
+    progress_recorder.set_progress(10, 100)
+
+    noisyAudio, sr = read_audio(file_path, sample_rate=fs)
+
+    noiseAudioFeatureExtractor = FeatureExtractor(
+        noisyAudio, windowLength=windowLength, overlap=overlap, sample_rate=sr)
+    noise_stft_features = noiseAudioFeatureExtractor.get_stft_spectrogram()
+    progress_recorder.set_progress(20, 100)
+
+    noisyPhase = np.angle(noise_stft_features)
+    print(noisyPhase.shape)
+    noise_stft_features = np.abs(noise_stft_features)
+
+    mean = np.mean(noise_stft_features)
+    std = np.std(noise_stft_features)
+    noise_stft_features = (noise_stft_features - mean) / std
+
+    predictors = prepare_input_features(noise_stft_features)
+    progress_recorder.set_progress(35, 100)
+
+    predictors = np.reshape(
+        predictors, (predictors.shape[0], predictors.shape[1], 1, predictors.shape[2]))
+    predictors = np.transpose(predictors, (3, 0, 1, 2)).astype(np.float32)
+    print('predictors.shape:', predictors.shape)
+    progress_recorder.set_progress(45, 100)
+
+    STFTFullyConvolutional = model.predict(predictors)
+    print(STFTFullyConvolutional.shape)
+    progress_recorder.set_progress(70, 100)
+
+    denoisedAudioFullyConvolutional = revert_features_to_audio(
+        STFTFullyConvolutional, noisyPhase, mean, std)
+    print("Min:", np.min(denoisedAudioFullyConvolutional),
+          "Max:", np.max(denoisedAudioFullyConvolutional))
+
+    noisy_part = denoisedAudioFullyConvolutional[0:5250]
+
+    # Perform noise reduction
+    reduced_noise = nr.reduce_noise(
+        audio_clip=denoisedAudioFullyConvolutional, noise_clip=noisy_part, verbose=False)
+    print(type(reduced_noise))
+    progress_recorder.set_progress(85, 100)
+
+    # Save Audio
+    op_file_path = os.path.join(folder_path, 'audio_processed_denoised.wav')
+    sf.write(op_file_path, reduced_noise, 16000)
+
+    op_file_path_rel = os.path.join(
+        settings.AUDIO_PROCESSING_ROOT, audio_id, 'audio_processed_denoised.wav')
+
+    return op_file_path_rel
diff --git a/backend/processAPI/tasks.py b/backend/processAPI/tasks.py
@@ -0,0 +1,75 @@
+import os
+
+from celery import shared_task
+from celery_progress.backend import ProgressRecorder
+from django.conf import settings
+
+from processAPI.models import AudioFile
+from processAPI.helper import dl_noise_reduce
+
+AUDIO_FILE_NAME = 'audio'
+INCOMING_AUDIO_EXTENSION = '.mp3'
+PROCESSED_AUDIO_PREFIX = 'processed_audio'
+PROCESSED_AUDIO_EXTENSION = '.wav'
+AUDIO_SAMPLE_RATE = '44100'
+AUDIO_BIT_RATE = '64k'
+
+
+@shared_task()
+def preprocess_audio(audio_id, audio_file_ext):
+    """
+    Initialize processed_audio with 'wav' format of original audio
+    """
+    try:
+        global INCOMING_AUDIO_EXTENSION
+        INCOMING_AUDIO_EXTENSION = audio_file_ext
+
+        folder_path = os.path.join(
+            settings.MEDIA_ROOT, settings.AUDIO_PROCESSING_ROOT, audio_id)
+        os.chdir(folder_path)
+
+        # convert original audio to wav
+        os.system(
+            'ffmpeg -hide_banner -y -i ' +
+            AUDIO_FILE_NAME + INCOMING_AUDIO_EXTENSION +
+            ' -ab ' + AUDIO_BIT_RATE +
+            ' -ar ' + AUDIO_SAMPLE_RATE +
+            ' -vn ' +
+            PROCESSED_AUDIO_PREFIX + PROCESSED_AUDIO_EXTENSION)
+
+        AudioFile.objects.filter(pk=audio_id).update(
+            processed_audio=os.path.join(
+                settings.AUDIO_PROCESSING_ROOT, audio_id, PROCESSED_AUDIO_PREFIX + PROCESSED_AUDIO_EXTENSION),
+        )
+    except FileNotFoundError:
+        AudioFile.objects.filter(pk=audio_id).update()
+
+
+@shared_task(bind=True)
+def denoise_audio(self, audio_id):
+    """
+    Process audio through deep learning to reduce noise
+    """
+    try:
+        progress_recorder = ProgressRecorder(self)
+
+        audio_file = AudioFile.objects.get(pk=audio_id)
+        audio_file_path = audio_file.processed_audio.path
+
+        audio_id = str(audio_id)
+        folder_path = os.path.join(
+            settings.MEDIA_ROOT, settings.AUDIO_PROCESSING_ROOT, audio_id)
+        os.chdir(folder_path)
+        progress_recorder.set_progress(5, 100)
+
+        op_file_path = dl_noise_reduce(
+            audio_id, folder_path, audio_file_path, progress_recorder)
+        progress_recorder.set_progress(95, 100)
+
+        audio_file.processed_audio = op_file_path
+        audio_file.save()
+        progress_recorder.set_progress(100, 100)
+
+        return 'AUDIO_DENOISED'
+    except FileNotFoundError:
+        AudioFile.objects.filter(pk=audio_id).update()
diff --git a/backend/processAPI/urls.py b/backend/processAPI/urls.py
@@ -8,4 +8,6 @@
          name='audio-list-create'),
     path('process_audio/<uuid:pk>/', views.AudioFileDetail.as_view(),
          name='audio-detail'),
+    path('process_audio/<uuid:pk>/reduce_noise/', views.AudioReduceNoise.as_view(),
+         name='audio-reduce-noise'),
 ]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		default_app_config = 'processAPI.apps.ProcessapiConfig'