Skip to content

Commit

Permalink
v1.0.1 Release
Browse files Browse the repository at this point in the history
  • Loading branch information
Antibasis committed Dec 6, 2021
1 parent 1f4adce commit c27e0bf
Show file tree
Hide file tree
Showing 10 changed files with 39 additions and 87 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ pip install -r requirements.txt
```

## Features
* Both **aQHM** and **eaQHM** are supported.
* Preprocessing high pass filter option.
* [SWIPEP pitch estimator](http://www.kerwa.ucr.ac.cr:8080/bitstream/handle/10669/536/dissertation.pdf) is used for the ```f0``` estimations, implemented in Python by Disha Garg: https://github.com/dishagarg/SWIPE
* The user may use custom pitch limits for the estimation.
Expand All @@ -39,7 +38,7 @@ A *main.py* file is provided, which executes **eaQHMAnalysisAndSynthesis** on a
What you have to do is:
1. Open *main.py*.
2. Run the code.
3. Write the name of the *.wav* file to be processed in the console.
3. A file dialog should open where you must select the *.wav* file of your choice.
4. Specify the gender of the speaker ("male", "female" or other). You may also use "child" as an input.
5. The program will print some prompts showing the **Signal-to-Reconstruction-Ratio (SRER)** [[1]](#Reference) of each adaptation and some plots will be generated.
6. After the program terminates, a *\*filename\*_reconstructed.wav* file will be generated.
Expand All @@ -51,6 +50,7 @@ Here is an example of the output of the code running *SA19.wav*:
And here are the plots produced:

![](img/freq.png)
![](img/freq2.png)
![](img/timeDom.png)
![](img/timeDom2.png)

Expand Down
2 changes: 1 addition & 1 deletion SWIPE.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def swipep(x, fs, speechFile, plim):
# Compute spectrum
w = hanning(ws[i]) # Hann window
o = max(0, round(ws[i] - dn)) # Window overlap
[X, f, ti, im] = specgram(xk, NFFT=int(ws[i]), Fs=fs, window=w, noverlap=int(o))
[X, f, ti, im] = specgram(xk, NFFT=int(ws[i]), Fs=fs, window=w, noverlap=int(o), vmin=-180, vmax=-40)

# Interpolate at equidistant ERBs steps
f = asarray(f)
Expand Down
86 changes: 7 additions & 79 deletions functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@

def eaQHMAnalysisAndSynthesis(speechFile: str, gender: str or tuple = 'other', step: int = 15,
maxAdpt: int = 10, pitchPeriods: int = 3, analysisWindow: int = 32, fullWaveform: bool = True,
fullBand: bool = True, eaQHM: bool = True, fc: int = 0, partials: int = 0,
printPrompts: bool = True, loadingScreen: bool = True):
fc: int = 0, partials: int = 0, printPrompts: bool = True, loadingScreen: bool = True):
'''
Performs adaptive Quasi-Harmonic Analysis of Speech
using the extended adaptive Quasi-Harmonic Model and decomposes
Expand All @@ -61,10 +60,6 @@ def eaQHMAnalysisAndSynthesis(speechFile: str, gender: str or tuple = 'other', s
The steps of the pitch analysis window, where the analysis starts. The default is 32.
fullWaveform : bool, optional
Determines if a full waveform length analysis will be performed. The default is True.
fullBand : bool, optional
Determines if a full band analysis-in-voiced-frames will be performed. The default is True.
eaQHM : bool, optional
Determines if an adaptive Quasi-Harmonic Model or an extended adaptive Quasi-Harmonic Model will be used. The default is True.
fc : int, optional
Applies a high pass filtering at the specified Hz before the analysis starts. If <= 0, no filter is applied. The default is 0.
partials : int, optional
Expand Down Expand Up @@ -117,16 +112,12 @@ def eaQHMAnalysisAndSynthesis(speechFile: str, gender: str or tuple = 'other', s

f0s = getLinear(f0s, arange(0, len(s2)-1, round(fs*5/1000))/fs)

if fullBand:
Fmax = int(fs/2-200)
Fmax = int(fs/2-200)

if partials > 0:
Kmax = partials
else:
Kmax = int(round(Fmax/min(f0s[:,1])) + 10)
if partials > 0:
Kmax = partials
else:
Fmax = int(fs/2-2000)
Kmax = int(round(Fmax/min(f0s[:,1])) + 10)
Kmax = int(round(Fmax/min(f0s[:,1])) + 10)


analysisWindowSamples = analysisWindow*step
Expand Down Expand Up @@ -300,10 +291,8 @@ def eaQHMAnalysisAndSynthesis(speechFile: str, gender: str or tuple = 'other', s
fm = concatenate((-flipud(transpose1dArray(fm)), tmp_zeros, transpose1dArray(fm)), axis=1)
am = concatenate((flipud(transpose1dArray(am)), tmp_zeros, transpose1dArray(am)), axis=1)

if not eaQHM:
amplitudes_tmp, slopes_tmp = aqhmLS_complexamps(s[window_range + tith], fm, window, fs)
else:
amplitudes_tmp, slopes_tmp = eaqhmLS_complexamps(s[window_range + tith], am, fm, window, fs)

amplitudes_tmp, slopes_tmp = eaqhmLS_complexamps(s[window_range + tith], am, fm, window, fs)

fmismatch_tmp = fs/(2*pi)*divide(multiply(real(amplitudes_tmp), imag(slopes_tmp)) - multiply(imag(amplitudes_tmp), real(slopes_tmp)), abs(amplitudes_tmp)**2)

Expand Down Expand Up @@ -479,67 +468,6 @@ def iqhmLS_complexamps(s, f0range, window, fs: int):
slopes = ampsl[K:2*K+1]

return amplitudes, slopes

def aqhmLS_complexamps(s, fm, window, fs):
'''
Computes the parameters of first order complex polynomial
model using Least Squares and a FM model for the frequency.
Parameters
----------
s : array_like
The part of the signal to be computed.
fm : array_like
The estimated instantaneous frequencies.
window : array_like
The window of the signal to be computed.
fs : int
The sampling frequency.
Returns
-------
amplitudes : array_like
Amplitude of harmonics.
slopes : array_like
Slope of harmonics.
'''
windowT = transpose1dArray(window)

length = len(fm)
K = len(fm[0])

midlen = int((length-1)/2)

window_range = arange(-midlen,midlen+1)
window_rangeT = transpose1dArray(window_range)

f_an = zeros((K, length), float)
for k in range(K):
f_an[k] = lfilter([1], [1, -1], fm[:, k])
f_an[k] -= f_an[k][midlen]

t = (2*pi*f_an)/fs
tT = transpose(t)

E1 = cos(tT) + 1j* sin(tT)
E = concatenate((E1, tile(window_rangeT, (1, K))*E1), axis=1)

Ewindow = multiply(E, tile(windowT, (1, 2*K)))
EwindowT = conjugate(transpose(Ewindow))

R = dot(EwindowT, Ewindow)

#assert(cond(R) < 10**(10)),'CAUTION!!! Bad condition of matrix.'

windowSignal = multiply(windowT, s)
arr = dot(EwindowT, windowSignal)
ampsl = dot(inv(R), arr)

amplitudes = ampsl[0:K]
slopes = ampsl[K:2*K+1]

return amplitudes, slopes

def eaqhmLS_complexamps(s, am, fm, window, fs):
'''
Expand Down
Binary file modified img/SA19out.JPG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified img/freq.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added img/freq2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified img/timeDom.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified img/timeDom2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
31 changes: 27 additions & 4 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,27 @@
from functions import eaQHMAnalysisAndSynthesis
from numpy import arange, float32
from scipy.io.wavfile import write
from matplotlib.pyplot import plot, show, xlabel, ylabel, title
from matplotlib.pyplot import plot, show, xlabel, ylabel, title, specgram, colorbar
from scipy.io.wavfile import read
from misc import normalize
from tkinter import Tk
from tkinter.filedialog import askopenfilename

def plotGraphs(t, t_reconst, signal, signal_reconst, name):

def plotGraphs(t, t_reconst, signal, signal_reconst, name, fs):
colorbar(label="Intensity (dB)")
title("Spectrogram of " + name)
xlabel('Time (s)')
ylabel('Frequency (Hz)')
show()

specgram(signal_reconst, Fs=fs, vmin=-180, vmax=-40)
colorbar(label="Intensity (dB)")
title("Spectrogram of " + name + " reconstructed")
xlabel('Time (s)')
ylabel('Frequency (Hz)')
show()

plot(t, signal)
title(name)
xlabel('Time (s)')
Expand All @@ -31,7 +42,19 @@ def plotGraphs(t, t_reconst, signal, signal_reconst, name):
show()

def main():
filename = input("Write the name of the file to be processed: ")
root = Tk()
root.withdraw()
root.attributes("-topmost", True)
filename = askopenfilename(
parent=root,
title="Write the name of the file to be processed",
initialdir="/",
filetypes=("WAVFILES *.wav",))

if filename == "":
raise ValueError("No File Selected")

print("File Selected: " + filename)
gender = input("You may include a gender (male, female, child or other): ")

print()
Expand All @@ -44,7 +67,7 @@ def main():
t = arange(0, len(signal)/fs, dt)

t_reconstructed = arange(0, len(signal_reconstructed)/fs, dt)
plotGraphs(t, t_reconstructed, signal, signal_reconstructed, filename)
plotGraphs(t, t_reconstructed, signal, signal_reconstructed, filename, fs)

write(filename[0:len(filename)-4]+"_reconstructed.wav", fs, float32(signal_reconstructed))

Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ diff-match-patch @ file:///tmp/build/80754af9/diff-match-patch_1594828741838/wor
distributed @ file:///C:/ci/distributed_1594742844291/work
docopt==0.6.2
docutils==0.16
easygui==0.98.2
entrypoints==0.3
et-xmlfile==1.0.1
fastcache==1.1.0
Expand Down Expand Up @@ -252,4 +253,4 @@ yarg==0.1.9
zict==2.0.0
zipp==3.1.0
zope.event==4.4
zope.interface==4.7.1
zope.interface==4.7.1

0 comments on commit c27e0bf

Please sign in to comment.