diff --git a/README.md b/README.md index 8696a79..1771d91 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,6 @@ pip install -r requirements.txt ``` ## Features -* Both **aQHM** and **eaQHM** are supported. * Preprocessing high pass filter option. * [SWIPEP pitch estimator](http://www.kerwa.ucr.ac.cr:8080/bitstream/handle/10669/536/dissertation.pdf) is used for the ```f0``` estimations, implemented in Python by Disha Garg: https://github.com/dishagarg/SWIPE * The user may use custom pitch limits for the estimation. @@ -39,7 +38,7 @@ A *main.py* file is provided, which executes **eaQHMAnalysisAndSynthesis** on a What you have to do is: 1. Open *main.py*. 2. Run the code. -3. Write the name of the *.wav* file to be processed in the console. +3. A file dialog should open where you must select the *.wav* file of your choice. 4. Specify the gender of the speaker ("male", "female" or other). You may also use "child" as an input. 5. The program will print some prompts showing the **Signal-to-Reconstruction-Ratio (SRER)** [[1]](#Reference) of each adaptation and some plots will be generated. 6. After the program terminates, a *\*filename\*_reconstructed.wav* file will be generated. @@ -51,6 +50,7 @@ Here is an example of the output of the code running *SA19.wav*: And here are the plots produced: ![](img/freq.png) +![](img/freq2.png) ![](img/timeDom.png) ![](img/timeDom2.png) diff --git a/SWIPE.py b/SWIPE.py index 9332e22..1cdf76d 100644 --- a/SWIPE.py +++ b/SWIPE.py @@ -63,7 +63,7 @@ def swipep(x, fs, speechFile, plim): # Compute spectrum w = hanning(ws[i]) # Hann window o = max(0, round(ws[i] - dn)) # Window overlap - [X, f, ti, im] = specgram(xk, NFFT=int(ws[i]), Fs=fs, window=w, noverlap=int(o)) + [X, f, ti, im] = specgram(xk, NFFT=int(ws[i]), Fs=fs, window=w, noverlap=int(o), vmin=-180, vmax=-40) # Interpolate at equidistant ERBs steps f = asarray(f) diff --git a/functions.py b/functions.py index b7b7a24..b9ba880 100644 --- a/functions.py +++ b/functions.py @@ -34,8 +34,7 @@ def eaQHMAnalysisAndSynthesis(speechFile: str, gender: str or tuple = 'other', step: int = 15, maxAdpt: int = 10, pitchPeriods: int = 3, analysisWindow: int = 32, fullWaveform: bool = True, - fullBand: bool = True, eaQHM: bool = True, fc: int = 0, partials: int = 0, - printPrompts: bool = True, loadingScreen: bool = True): + fc: int = 0, partials: int = 0, printPrompts: bool = True, loadingScreen: bool = True): ''' Performs adaptive Quasi-Harmonic Analysis of Speech using the extended adaptive Quasi-Harmonic Model and decomposes @@ -61,10 +60,6 @@ def eaQHMAnalysisAndSynthesis(speechFile: str, gender: str or tuple = 'other', s The steps of the pitch analysis window, where the analysis starts. The default is 32. fullWaveform : bool, optional Determines if a full waveform length analysis will be performed. The default is True. - fullBand : bool, optional - Determines if a full band analysis-in-voiced-frames will be performed. The default is True. - eaQHM : bool, optional - Determines if an adaptive Quasi-Harmonic Model or an extended adaptive Quasi-Harmonic Model will be used. The default is True. fc : int, optional Applies a high pass filtering at the specified Hz before the analysis starts. If <= 0, no filter is applied. The default is 0. partials : int, optional @@ -117,16 +112,12 @@ def eaQHMAnalysisAndSynthesis(speechFile: str, gender: str or tuple = 'other', s f0s = getLinear(f0s, arange(0, len(s2)-1, round(fs*5/1000))/fs) - if fullBand: - Fmax = int(fs/2-200) + Fmax = int(fs/2-200) - if partials > 0: - Kmax = partials - else: - Kmax = int(round(Fmax/min(f0s[:,1])) + 10) + if partials > 0: + Kmax = partials else: - Fmax = int(fs/2-2000) - Kmax = int(round(Fmax/min(f0s[:,1])) + 10) + Kmax = int(round(Fmax/min(f0s[:,1])) + 10) analysisWindowSamples = analysisWindow*step @@ -300,10 +291,8 @@ def eaQHMAnalysisAndSynthesis(speechFile: str, gender: str or tuple = 'other', s fm = concatenate((-flipud(transpose1dArray(fm)), tmp_zeros, transpose1dArray(fm)), axis=1) am = concatenate((flipud(transpose1dArray(am)), tmp_zeros, transpose1dArray(am)), axis=1) - if not eaQHM: - amplitudes_tmp, slopes_tmp = aqhmLS_complexamps(s[window_range + tith], fm, window, fs) - else: - amplitudes_tmp, slopes_tmp = eaqhmLS_complexamps(s[window_range + tith], am, fm, window, fs) + + amplitudes_tmp, slopes_tmp = eaqhmLS_complexamps(s[window_range + tith], am, fm, window, fs) fmismatch_tmp = fs/(2*pi)*divide(multiply(real(amplitudes_tmp), imag(slopes_tmp)) - multiply(imag(amplitudes_tmp), real(slopes_tmp)), abs(amplitudes_tmp)**2) @@ -479,67 +468,6 @@ def iqhmLS_complexamps(s, f0range, window, fs: int): slopes = ampsl[K:2*K+1] return amplitudes, slopes - -def aqhmLS_complexamps(s, fm, window, fs): - ''' - Computes the parameters of first order complex polynomial - model using Least Squares and a FM model for the frequency. - - Parameters - ---------- - s : array_like - The part of the signal to be computed. - fm : array_like - The estimated instantaneous frequencies. - window : array_like - The window of the signal to be computed. - fs : int - The sampling frequency. - - Returns - ------- - amplitudes : array_like - Amplitude of harmonics. - slopes : array_like - Slope of harmonics. - - ''' - windowT = transpose1dArray(window) - - length = len(fm) - K = len(fm[0]) - - midlen = int((length-1)/2) - - window_range = arange(-midlen,midlen+1) - window_rangeT = transpose1dArray(window_range) - - f_an = zeros((K, length), float) - for k in range(K): - f_an[k] = lfilter([1], [1, -1], fm[:, k]) - f_an[k] -= f_an[k][midlen] - - t = (2*pi*f_an)/fs - tT = transpose(t) - - E1 = cos(tT) + 1j* sin(tT) - E = concatenate((E1, tile(window_rangeT, (1, K))*E1), axis=1) - - Ewindow = multiply(E, tile(windowT, (1, 2*K))) - EwindowT = conjugate(transpose(Ewindow)) - - R = dot(EwindowT, Ewindow) - - #assert(cond(R) < 10**(10)),'CAUTION!!! Bad condition of matrix.' - - windowSignal = multiply(windowT, s) - arr = dot(EwindowT, windowSignal) - ampsl = dot(inv(R), arr) - - amplitudes = ampsl[0:K] - slopes = ampsl[K:2*K+1] - - return amplitudes, slopes def eaqhmLS_complexamps(s, am, fm, window, fs): ''' diff --git a/img/SA19out.JPG b/img/SA19out.JPG index bed868f..b744baa 100644 Binary files a/img/SA19out.JPG and b/img/SA19out.JPG differ diff --git a/img/freq.png b/img/freq.png index 22cdbe8..d0fdf1b 100644 Binary files a/img/freq.png and b/img/freq.png differ diff --git a/img/freq2.png b/img/freq2.png new file mode 100644 index 0000000..19dbd61 Binary files /dev/null and b/img/freq2.png differ diff --git a/img/timeDom.png b/img/timeDom.png index 8ba105a..af7d5d2 100644 Binary files a/img/timeDom.png and b/img/timeDom.png differ diff --git a/img/timeDom2.png b/img/timeDom2.png index 939f48f..5cf9fe6 100644 Binary files a/img/timeDom2.png and b/img/timeDom2.png differ diff --git a/main.py b/main.py index 0a95b4b..01833c9 100644 --- a/main.py +++ b/main.py @@ -8,16 +8,27 @@ from functions import eaQHMAnalysisAndSynthesis from numpy import arange, float32 from scipy.io.wavfile import write -from matplotlib.pyplot import plot, show, xlabel, ylabel, title +from matplotlib.pyplot import plot, show, xlabel, ylabel, title, specgram, colorbar from scipy.io.wavfile import read from misc import normalize +from tkinter import Tk +from tkinter.filedialog import askopenfilename -def plotGraphs(t, t_reconst, signal, signal_reconst, name): + +def plotGraphs(t, t_reconst, signal, signal_reconst, name, fs): + colorbar(label="Intensity (dB)") title("Spectrogram of " + name) xlabel('Time (s)') ylabel('Frequency (Hz)') show() + specgram(signal_reconst, Fs=fs, vmin=-180, vmax=-40) + colorbar(label="Intensity (dB)") + title("Spectrogram of " + name + " reconstructed") + xlabel('Time (s)') + ylabel('Frequency (Hz)') + show() + plot(t, signal) title(name) xlabel('Time (s)') @@ -31,7 +42,19 @@ def plotGraphs(t, t_reconst, signal, signal_reconst, name): show() def main(): - filename = input("Write the name of the file to be processed: ") + root = Tk() + root.withdraw() + root.attributes("-topmost", True) + filename = askopenfilename( + parent=root, + title="Write the name of the file to be processed", + initialdir="/", + filetypes=("WAVFILES *.wav",)) + + if filename == "": + raise ValueError("No File Selected") + + print("File Selected: " + filename) gender = input("You may include a gender (male, female, child or other): ") print() @@ -44,7 +67,7 @@ def main(): t = arange(0, len(signal)/fs, dt) t_reconstructed = arange(0, len(signal_reconstructed)/fs, dt) - plotGraphs(t, t_reconstructed, signal, signal_reconstructed, filename) + plotGraphs(t, t_reconstructed, signal, signal_reconstructed, filename, fs) write(filename[0:len(filename)-4]+"_reconstructed.wav", fs, float32(signal_reconstructed)) diff --git a/requirements.txt b/requirements.txt index b47cee1..92927bc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -50,6 +50,7 @@ diff-match-patch @ file:///tmp/build/80754af9/diff-match-patch_1594828741838/wor distributed @ file:///C:/ci/distributed_1594742844291/work docopt==0.6.2 docutils==0.16 +easygui==0.98.2 entrypoints==0.3 et-xmlfile==1.0.1 fastcache==1.1.0 @@ -252,4 +253,4 @@ yarg==0.1.9 zict==2.0.0 zipp==3.1.0 zope.event==4.4 -zope.interface==4.7.1 +zope.interface==4.7.1 \ No newline at end of file