v1.0.1 Release

Antibas · Dec 6, 2021 · c27e0bf · c27e0bf
1 parent 1f4adce
commit c27e0bf
Show file tree

Hide file tree

Showing 10 changed files with 39 additions and 87 deletions.
diff --git a/README.md b/README.md
@@ -15,7 +15,6 @@ pip install -r requirements.txt
 ```
 
 ## Features
-* Both **aQHM** and **eaQHM** are supported.
 * Preprocessing high pass filter option.
 * [SWIPEP pitch estimator](http://www.kerwa.ucr.ac.cr:8080/bitstream/handle/10669/536/dissertation.pdf) is used for the ```f0``` estimations, implemented in Python by Disha Garg: https://github.com/dishagarg/SWIPE
 	* The user may use custom pitch limits for the estimation.
@@ -39,7 +38,7 @@ A *main.py* file is provided, which executes **eaQHMAnalysisAndSynthesis** on a
 What you have to do is:
 1. Open *main.py*.
 2. Run the code.
-3. Write the name of the *.wav* file to be processed in the console.
+3. A file dialog should open where you must select the *.wav* file of your choice.
 4. Specify the gender of the speaker ("male", "female" or other). You may also use "child" as an input.
 5. The program will print some prompts showing the **Signal-to-Reconstruction-Ratio (SRER)** [[1]](#Reference) of each adaptation and some plots will be generated.
 6. After the program terminates, a *\*filename\*_reconstructed.wav* file will be generated.
@@ -51,6 +50,7 @@ Here is an example of the output of the code running *SA19.wav*:
 And here are the plots produced:
 
 ![](img/freq.png)
+![](img/freq2.png)
 ![](img/timeDom.png)
 ![](img/timeDom2.png)
 

diff --git a/SWIPE.py b/SWIPE.py
@@ -63,7 +63,7 @@ def swipep(x, fs, speechFile, plim):
         # Compute spectrum
         w = hanning(ws[i])  # Hann window
         o = max(0, round(ws[i] - dn))  # Window overlap
-        [X, f, ti, im] = specgram(xk, NFFT=int(ws[i]), Fs=fs, window=w, noverlap=int(o))
+        [X, f, ti, im] = specgram(xk, NFFT=int(ws[i]), Fs=fs, window=w, noverlap=int(o), vmin=-180, vmax=-40)
 
         # Interpolate at equidistant ERBs steps
         f = asarray(f)

diff --git a/functions.py b/functions.py
@@ -34,8 +34,7 @@
 
 def eaQHMAnalysisAndSynthesis(speechFile: str, gender: str or tuple = 'other', step: int = 15,
                   maxAdpt: int = 10, pitchPeriods: int = 3, analysisWindow: int = 32, fullWaveform: bool = True,
-                  fullBand: bool = True, eaQHM: bool = True, fc: int = 0, partials: int = 0,
-                  printPrompts: bool = True, loadingScreen: bool = True):
+                  fc: int = 0, partials: int = 0, printPrompts: bool = True, loadingScreen: bool = True):
     '''
     Performs adaptive Quasi-Harmonic Analysis of Speech
     using the extended adaptive Quasi-Harmonic Model and decomposes 
@@ -61,10 +60,6 @@ def eaQHMAnalysisAndSynthesis(speechFile: str, gender: str or tuple = 'other', s
         The steps of the pitch analysis window, where the analysis starts. The default is 32.
     fullWaveform : bool, optional
         Determines if a full waveform length analysis will be performed. The default is True.
-    fullBand : bool, optional
-        Determines if a full band analysis-in-voiced-frames will be performed. The default is True.
-    eaQHM : bool, optional
-        Determines if an adaptive Quasi-Harmonic Model or an extended adaptive Quasi-Harmonic Model will be used. The default is True.
     fc : int, optional
         Applies a high pass filtering at the specified Hz before the analysis starts. If <= 0, no filter is applied. The default is 0.
     partials : int, optional
@@ -117,16 +112,12 @@ def eaQHMAnalysisAndSynthesis(speechFile: str, gender: str or tuple = 'other', s
 
     f0s = getLinear(f0s, arange(0, len(s2)-1, round(fs*5/1000))/fs)
 
-    if fullBand:
-        Fmax = int(fs/2-200)
+    Fmax = int(fs/2-200)
 
-        if partials > 0:
-            Kmax = partials
-        else:
-            Kmax = int(round(Fmax/min(f0s[:,1])) + 10)
+    if partials > 0:
+        Kmax = partials
     else:
-        Fmax = int(fs/2-2000)
-        Kmax = int(round(Fmax/min(f0s[:,1])) + 10) 
+        Kmax = int(round(Fmax/min(f0s[:,1])) + 10)
 
 
     analysisWindowSamples = analysisWindow*step
@@ -300,10 +291,8 @@ def eaQHMAnalysisAndSynthesis(speechFile: str, gender: str or tuple = 'other', s
                             fm = concatenate((-flipud(transpose1dArray(fm)), tmp_zeros, transpose1dArray(fm)), axis=1)
                             am = concatenate((flipud(transpose1dArray(am)), tmp_zeros, transpose1dArray(am)), axis=1)
 
-                        if not eaQHM:
-                            amplitudes_tmp, slopes_tmp = aqhmLS_complexamps(s[window_range + tith], fm, window, fs)
-                        else:
-                            amplitudes_tmp, slopes_tmp = eaqhmLS_complexamps(s[window_range + tith], am, fm, window, fs)
+
+                        amplitudes_tmp, slopes_tmp = eaqhmLS_complexamps(s[window_range + tith], am, fm, window, fs)
 
                         fmismatch_tmp = fs/(2*pi)*divide(multiply(real(amplitudes_tmp), imag(slopes_tmp)) - multiply(imag(amplitudes_tmp), real(slopes_tmp)), abs(amplitudes_tmp)**2)
 
@@ -479,67 +468,6 @@ def iqhmLS_complexamps(s, f0range, window, fs: int):
     slopes = ampsl[K:2*K+1]
 
     return amplitudes, slopes
-
-def aqhmLS_complexamps(s, fm, window, fs):
-    '''
-    Computes the parameters of first order complex polynomial
-    model using Least Squares and a FM model for the frequency. 
-
-    Parameters
-    ----------
-    s : array_like
-        The part of the signal to be computed.
-    fm : array_like
-        The estimated instantaneous frequencies.
-    window : array_like
-        The window of the signal to be computed.
-    fs : int
-        The sampling frequency.
-
-    Returns
-    -------
-    amplitudes : array_like
-        Amplitude of harmonics.
-    slopes : array_like
-        Slope of harmonics.
-
-    '''
-    windowT = transpose1dArray(window)
-
-    length = len(fm)
-    K = len(fm[0])
-
-    midlen = int((length-1)/2)
-
-    window_range = arange(-midlen,midlen+1)
-    window_rangeT = transpose1dArray(window_range)
-
-    f_an = zeros((K, length), float)
-    for k in range(K):
-        f_an[k] = lfilter([1], [1, -1], fm[:, k])
-        f_an[k] -= f_an[k][midlen]
-
-    t = (2*pi*f_an)/fs
-    tT = transpose(t)
-
-    E1 = cos(tT) + 1j* sin(tT)
-    E = concatenate((E1, tile(window_rangeT, (1, K))*E1), axis=1)
-
-    Ewindow = multiply(E, tile(windowT, (1, 2*K))) 
-    EwindowT = conjugate(transpose(Ewindow))
-
-    R = dot(EwindowT, Ewindow)
-
-    #assert(cond(R) < 10**(10)),'CAUTION!!! Bad condition of matrix.'
-
-    windowSignal = multiply(windowT, s)
-    arr = dot(EwindowT, windowSignal)
-    ampsl = dot(inv(R), arr)
-
-    amplitudes = ampsl[0:K]
-    slopes = ampsl[K:2*K+1]
-
-    return amplitudes, slopes
 
 def eaqhmLS_complexamps(s, am, fm, window, fs):
     '''

diff --git a/img/SA19out.JPG b/img/SA19out.JPG
diff --git a/img/freq.png b/img/freq.png
diff --git a/img/freq2.png b/img/freq2.png
diff --git a/img/timeDom.png b/img/timeDom.png
diff --git a/img/timeDom2.png b/img/timeDom2.png
diff --git a/main.py b/main.py
@@ -8,16 +8,27 @@
 from functions import eaQHMAnalysisAndSynthesis
 from numpy import arange, float32
 from scipy.io.wavfile import write
-from matplotlib.pyplot import plot, show, xlabel, ylabel, title
+from matplotlib.pyplot import plot, show, xlabel, ylabel, title, specgram, colorbar
 from scipy.io.wavfile import read
 from misc import normalize
+from tkinter import Tk
+from tkinter.filedialog import askopenfilename
 
-def plotGraphs(t, t_reconst, signal, signal_reconst, name):
+
+def plotGraphs(t, t_reconst, signal, signal_reconst, name, fs):
+    colorbar(label="Intensity (dB)")
     title("Spectrogram of " + name)
     xlabel('Time (s)')
     ylabel('Frequency (Hz)')
     show()
 
+    specgram(signal_reconst, Fs=fs, vmin=-180, vmax=-40)
+    colorbar(label="Intensity (dB)")
+    title("Spectrogram of " + name + " reconstructed")
+    xlabel('Time (s)')
+    ylabel('Frequency (Hz)')
+    show()
+
     plot(t, signal)
     title(name)
     xlabel('Time (s)')
@@ -31,7 +42,19 @@ def plotGraphs(t, t_reconst, signal, signal_reconst, name):
     show()
 
 def main():
-    filename = input("Write the name of the file to be processed: ")
+    root = Tk()
+    root.withdraw()
+    root.attributes("-topmost", True)
+    filename = askopenfilename(
+        parent=root,
+        title="Write the name of the file to be processed", 
+        initialdir="/", 
+        filetypes=("WAVFILES *.wav",))
+
+    if filename == "":
+        raise ValueError("No File Selected")
+
+    print("File Selected: " + filename)
     gender = input("You may include a gender (male, female, child or other): ")
 
     print()
@@ -44,7 +67,7 @@ def main():
     t = arange(0, len(signal)/fs, dt)
 
     t_reconstructed = arange(0, len(signal_reconstructed)/fs, dt)
-    plotGraphs(t, t_reconstructed, signal, signal_reconstructed, filename)
+    plotGraphs(t, t_reconstructed, signal, signal_reconstructed, filename, fs)
 
     write(filename[0:len(filename)-4]+"_reconstructed.wav", fs, float32(signal_reconstructed))
 

diff --git a/requirements.txt b/requirements.txt
@@ -50,6 +50,7 @@ diff-match-patch @ file:///tmp/build/80754af9/diff-match-patch_1594828741838/wor
 distributed @ file:///C:/ci/distributed_1594742844291/work
 docopt==0.6.2
 docutils==0.16
+easygui==0.98.2
 entrypoints==0.3
 et-xmlfile==1.0.1
 fastcache==1.1.0
@@ -252,4 +253,4 @@ yarg==0.1.9
 zict==2.0.0
 zipp==3.1.0
 zope.event==4.4
-zope.interface==4.7.1
+zope.interface==4.7.1