Skip to content

Commit

Permalink
fixing linter errors
Browse files Browse the repository at this point in the history
  • Loading branch information
EyubogluMerve committed Jan 4, 2024
1 parent 4e78ca8 commit a8cfe11
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 82 deletions.
2 changes: 1 addition & 1 deletion datasets/kws20.py
Original file line number Diff line number Diff line change
Expand Up @@ -791,4 +791,4 @@ def KWS_35_get_unquantized_datasets(data, load_train=True, load_test=True):
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
'loader': KWS_35_get_unquantized_datasets,
},
]
]
23 changes: 4 additions & 19 deletions datasets/mixedkws.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@
import torch
from torchvision import transforms

import ai8x

from kws20 import KWS_35_get_unquantized_datasets
from msnoise import MSnoise_get_unquantized_datasets

import ai8x


class MixedKWS:
"""
Expand Down Expand Up @@ -221,19 +221,7 @@ def quantize_audio(data, num_bits=8, compand=False, mu=255):

@staticmethod
def snr_mixer(clean, noise, snr):
<<<<<<< HEAD

=======

# Set the input clean range ~[-1,1]
max_clean = torch.max(abs(clean.reshape(clean.shape[0], -1)), 1, keepdims = True).values
clean = clean * (torch.where(max_clean != 0, 1.0 / max_clean, max_clean)).unsqueeze(1)

# Set the noise range ~[-1,1]
max_noise = torch.max(abs(noise.reshape(noise.shape[0], -1)), 1, keepdims = True).values
noise = noise * (torch.where(max_noise != 0, 1.0 / max_noise, max_noise)).unsqueeze(1)

>>>>>>> da27cd9ef9d6cf67ebcf35d401b9dd2e62ff5a75
"""Mix audio with noise at a given SNR level"""
# Normalizing to rms equal to 1
rmsclean = torch.mean(clean[:, :125]**2)**0.5
scalarclean = 1 / rmsclean
Expand All @@ -246,10 +234,7 @@ def snr_mixer(clean, noise, snr):
cleanfactor = 10**(snr/20)
noisyspeech = cleanfactor*clean + noise
noisyspeech = noisyspeech / (scalarnoise + cleanfactor * scalarclean)

# Set the output range ~[-1,1]
max_mixed = torch.max(abs(noisyspeech.reshape(noisyspeech.shape[0], 16384)), 1, keepdims = True).values
noisyspeech = noisyspeech * (torch.where(max_mixed != 0, 1.0 / max_mixed, max_mixed)).unsqueeze(1)

return noisyspeech

def __gen_datasets(self, exp_len=16384, row_len=128, overlap_ratio=0):
Expand Down
63 changes: 30 additions & 33 deletions datasets/msnoise.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,11 @@ class MSnoise:
'CopyMachine': 6, 'Field': 7, 'Hallway': 8, 'Kitchen': 9,
'LivingRoom': 10, 'Metro': 11, 'Munching': 12, 'NeighborSpeaking': 13,
'Office': 14, 'Park': 15, 'Restaurant': 16, 'ShuttingDoor': 17,
'Square': 18, 'SqueakyChair': 19, 'Station': 20,'TradeShow' : 21, 'Traffic': 22,
'Square': 18, 'SqueakyChair': 19, 'Station': 20, 'TradeShow': 21, 'Traffic': 22,
'Typing': 23, 'VacuumCleaner': 24, 'WasherDryer': 25, 'Washing': 26}

def __init__(self, root, classes, d_type, len, exp_len = 16384, fs = 16000, noise_time_step = 0.25, remove_unknowns=False,
transform=None, quantize=False, download=False):
def __init__(self, root, classes, d_type, len, exp_len=16384, fs=16000, noise_time_step=0.25,
remove_unknowns=False, transform=None, quantize=False, download=False):
self.root = root
self.classes = classes
self.d_type = d_type
Expand All @@ -91,7 +91,7 @@ def __init__(self, root, classes, d_type, len, exp_len = 16384, fs = 16000, nois
self.__download()

self.data, self.targets, self.data_type, self.rms_val = self.__gen_datasets()

# rms values for each sample to be returned
self.rms = np.zeros(self.len)

Expand All @@ -108,14 +108,13 @@ def __download(self):

if os.path.exists(self.raw_folder):
return

self.__makedir_exist_ok(self.noise_train_folder)
self.__makedir_exist_ok(self.noise_test_folder)

self.__download_raw(self.url_train)
self.__download_raw(self.url_test)


def __download_raw(self, api_url):
opener = urllib.request.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
Expand Down Expand Up @@ -161,7 +160,7 @@ def __filter_dtype(self):
else:
print(f'Unknown data type: {self.d_type}')
return

self.data = [self.data[i] for i in idx_to_select]
self.targets = [self.targets[i] for i in idx_to_select]
self.rms_val = [self.rms_val[i] for i in idx_to_select]
Expand Down Expand Up @@ -213,8 +212,8 @@ def __len__(self):
def __getitem__(self, index):

rec_num = len(self.data)
rnd_num = np.random.randint(0,rec_num)

rnd_num = np.random.randint(0, rec_num)
self.rms[index] = self.rms_val[rnd_num]

rec_len = len(self.data[rnd_num])
Expand All @@ -223,7 +222,7 @@ def __getitem__(self, index):
start_idx = np.random.randint(0, max_start_idx)
end_idx = start_idx + self.exp_len

inp = self.reshape_audio(self.data[rnd_num][start_idx:end_idx])
inp = self.__reshape_audio(self.data[rnd_num][start_idx:end_idx])
target = int(self.targets[rnd_num])

if self.quantize:
Expand All @@ -232,12 +231,11 @@ def __getitem__(self, index):
inp = self.transform(inp)
return inp, target

def reshape_audio(self, audio, exp_len=16384, row_len=128):
def __reshape_audio(self, audio, row_len=128):

return torch.transpose(torch.tensor(audio.reshape((-1, row_len))),1,0)

def __gen_datasets(self, exp_len=16384, row_len=128, overlap_ratio=0,
noise_time_step=0.25, train_ratio=0.6):
def __gen_datasets(self, exp_len=16384, row_len=128, overlap_ratio=0):

with warnings.catch_warnings():
warnings.simplefilter('error')
Expand Down Expand Up @@ -274,30 +272,27 @@ def __gen_datasets(self, exp_len=16384, row_len=128, overlap_ratio=0,
rms_val = []

for i, label in enumerate(labels):
train_count = 0
test_count = 0
for folder in train_test_folders:
for record_name in os.listdir(folder):
if record_name.split('_')[0] in label:
record_path = os.path.join(folder, record_name)
record, fs = librosa.load(record_path, offset=0, sr=None)
record, _ = librosa.load(record_path, offset=0, sr=None)

if self.quantize:
data_in.append(self.quantize_audio(record))
else:
data_in.append(record)

if (folder == self.noise_train_folder):
data_type.append(0) # train + val
elif (folder == self.noise_test_folder):
data_type.append(1) # test
if folder == self.noise_train_folder:
data_type.append(0) # train + val
elif folder == self.noise_test_folder:
data_type.append(1) # test

data_class.append(i)
rms_val.append(np.mean(record**2)**0.5)

noise_dataset = (data_in, data_class, data_type, rms_val)
return noise_dataset
print('Dataset created!')


def MSnoise_get_datasets(data, load_train=True, load_test=True):
Expand All @@ -312,18 +307,20 @@ def MSnoise_get_datasets(data, load_train=True, load_test=True):
(data_dir, args) = data

classes = ['AirConditioner', 'AirportAnnouncements',
'Babble', 'Bus', 'CafeTeria', 'Car',
'CopyMachine', 'Field', 'Hallway', 'Kitchen',
'LivingRoom', 'Metro', 'Munching', 'NeighborSpeaking',
'Office', 'Park', 'Restaurant', 'ShuttingDoor',
'Square', 'SqueakyChair', 'Station', 'Traffic',
'Typing', 'VacuumCleaner', 'WasherDryer', 'Washing', 'TradeShow']
'Babble', 'Bus', 'CafeTeria', 'Car',
'CopyMachine', 'Field', 'Hallway', 'Kitchen',
'LivingRoom', 'Metro', 'Munching', 'NeighborSpeaking',
'Office', 'Park', 'Restaurant', 'ShuttingDoor',
'Square', 'SqueakyChair', 'Station', 'Traffic',
'Typing', 'VacuumCleaner', 'WasherDryer', 'Washing', 'TradeShow']

#classes = ['AirConditioner', 'AirportAnnouncements',
# 'Babble', 'Bus', 'CafeTeria', 'Car',
# 'CopyMachine', 'Metro',
# 'Office', 'Restaurant', 'ShuttingDoor',
# 'Traffic', 'Typing', 'VacuumCleaner', 'Washing']
"""
classes = ['AirConditioner', 'AirportAnnouncements',
'Babble', 'Bus', 'CafeTeria', 'Car',
'CopyMachine', 'Metro',
'Office', 'Restaurant', 'ShuttingDoor',
'Traffic', 'Typing', 'VacuumCleaner', 'Washing']
"""

remove_unknowns = True
transform = transforms.Compose([
Expand Down Expand Up @@ -407,4 +404,4 @@ def MSnoise_get_unquantized_datasets(data, load_train=True, load_test=True):
16, 17, 18, 19, 20, 21, 22, 23, 24, 25),
'loader': MSnoise_get_unquantized_datasets,
},
]
]
52 changes: 30 additions & 22 deletions datasets/signalmixer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,47 +25,50 @@
import numpy as np
import torch


class signalmixer:
"""
Signal mixer dataloader to create datasets with specified
Signal mixer dataloader to create datasets with specified
length using a noise dataset and a speech dataset and a specified SNR level.
Args:
signal_dataset(object): KWS dataset object.
snr(int): SNR level to be created in the mixed dataset.
noise_kind(string): Noise kind that will be applied to the speech dataset.
noise_dataset(object, optional): MSnoise dataset object.
"""

def __init__(self, signal_dataset, snr, noise_kind, noise_dataset = None):

self.signal_data = signal_dataset.data
self.signal_targets = signal_dataset.targets

if (noise_kind != 'WhiteNoise'):
if noise_kind != 'WhiteNoise':
self.noise_data = noise_dataset.data
self.noise_targets = noise_dataset.targets

# using getitem to reach the noise test data
self.noise_dataset_float = next(iter(torch.utils.data.DataLoader(noise_dataset, batch_size = noise_dataset.len)))[0]

# using getitem to reach the noise test data
self.noise_dataset_float = next(iter(torch.utils.data.DataLoader(
noise_dataset, batch_size = noise_dataset.len)))[0]

self.noise_rms = noise_dataset.rms

self.snr = snr
self.noise_kind = noise_kind

# using getitem to reach the speech test data
self.test_dataset_float = next(iter(torch.utils.data.DataLoader(signal_dataset, batch_size = signal_dataset.data.shape[0])))[0]
# using getitem to reach the speech test data
self.test_dataset_float = next(iter(torch.utils.data.DataLoader(
signal_dataset, batch_size = signal_dataset.data.shape[0])))[0]

if (noise_kind == 'WhiteNoise'):
if noise_kind == 'WhiteNoise':
self.mixed_signal = self.white_noise_mixer()
else:
self.mixed_signal = self.snr_mixer()

def __getitem__(self, index):

inp, target = self.mixed_signal[index].type(torch.FloatTensor), int(self.signal_targets[index])
inp, = self.mixed_signal[index].type(torch.FloatTensor)
target = int(self.signal_targets[index])
return inp, target

def __len__(self):
Expand All @@ -84,23 +87,25 @@ def snr_mixer(self):

snr = self.snr

rmsclean = (torch.mean(clean.reshape(clean.shape[0], -1)**2, 1, keepdims = True)**0.5).unsqueeze(1)
rmsclean = torch.mean(clean.reshape(clean.shape[0], -1)**2, 1, keepdims = True)**0.5.unsqueeze(1)
scalarclean = 1 / rmsclean
clean = clean * scalarclean

scalarnoise = 1 / rms_noise.reshape(-1,1,1)
scalarnoise = 1 / rms_noise.reshape(-1, 1, 1)
noise = noise * scalarnoise

cleanfactor = 10**(snr/20)
noisyspeech = cleanfactor*clean + noise
noisyspeech = cleanfactor * clean + noise
noisyspeech = noisyspeech / (torch.tensor(scalarnoise) + cleanfactor * scalarclean)

# 16384 --> (noisyspeech[0].shape[0])*(noisyspeech[0].shape[1])
max_mixed = torch.max(abs(noisyspeech.reshape(noisyspeech.shape[0], (noisyspeech[0].shape[0])*(noisyspeech[0].shape[1]))), 1, keepdims = True).values
speech_shape = noisyspeech[0].shape[0]*noisyspeech[0].shape[1]
max_mixed = torch.max(abs(noisyspeech.reshape(
noisyspeech.shape[0], speech_shape)), 1, keepdims = True).values

noisyspeech = noisyspeech * (1/max_mixed).unsqueeze(1)
noisyspeech = noisyspeech * (1 / max_mixed).unsqueeze(1)
return noisyspeech

def white_noise_mixer(self):

# creates mixed signal dataset using the SNR level and white noise
Expand All @@ -113,20 +118,23 @@ def white_noise_mixer(self):
noise = np.random.normal(mean, std, clean.shape)
noise = torch.tensor(noise, dtype = torch.float32)

rmsclean = (torch.mean(clean.reshape(clean.shape[0], -1)**2, 1, keepdims = True)**0.5).unsqueeze(1)
rmsclean = (torch.mean(clean.reshape(
clean.shape[0], -1)**2, 1, keepdims = True)**0.5).unsqueeze(1)
scalarclean = 1 / rmsclean
clean = clean * scalarclean

rmsnoise = (torch.mean(noise.reshape(noise.shape[0], -1)**2, 1, keepdims = True)**0.5).unsqueeze(1)
rmsnoise = (torch.mean(noise.reshape(
noise.shape[0], -1)**2, 1, keepdims = True)**0.5).unsqueeze(1)
scalarnoise = 1 / rmsnoise
noise = noise * scalarnoise

cleanfactor = 10**(snr/20)
noisyspeech = cleanfactor*clean + noise
noisyspeech = cleanfactor * clean + noise
noisyspeech = noisyspeech / (scalarnoise + cleanfactor * scalarclean)

# scaling to ~[-1,1]
max_mixed = torch.max(abs(noisyspeech.reshape(noisyspeech.shape[0], 16384)), 1, keepdims = True).values
noisyspeech = noisyspeech * (torch.where(max_mixed != 0, 1.0 / max_mixed, max_mixed)).unsqueeze(1)
max_mixed = torch.max(abs(noisyspeech.reshape(
noisyspeech.shape[0], 16384)), 1, keepdims = True).values
noisyspeech = noisyspeech * (1 / max_mixed).unsqueeze(1)

return noisyspeech
14 changes: 7 additions & 7 deletions notebooks/Automated_Evaluation_KWS.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@
"metadata": {},
"outputs": [],
"source": [
"trained_checkpoint_path = os.path.join(\"/home/merveeyuboglu/Github/ai8x-training/codes/checkpoints/TrueVAL_NAS_0911Speed_DynAug/\",\"qat_best.pth.tar\")\n",
"mod = importlib.import_module(\"ai85nasnet_kws20_res_1\")\n",
"model_file = \"ai85nasnet_kws20_res_1\""
"trained_checkpoint_path = os.path.join(\"../../ai8x-synthesis/trained\",\"ai85-kws20_nas-qat8.pth.tar\")\n",
"mod = importlib.import_module(\"ai85net-kws20-nas\")\n",
"model_file = \"ai85net-kws20-nas\""
]
},
{
Expand Down Expand Up @@ -112,7 +112,7 @@
"metadata": {},
"outputs": [],
"source": [
"model = mod.AI85NASNET_KWS20_RES_1(num_classes=len(classes), num_channels=128, dimensions=(128, 1), bias=True, \n",
"model = mod.AI85KWS20NetNAS(num_classes=len(classes), num_channels=128, dimensions=(128, 1), bias=True, \n",
" quantize_activation=False)\n"
]
},
Expand Down Expand Up @@ -431,7 +431,7 @@
"metadata": {},
"outputs": [],
"source": [
"trained_checkpoint_path = os.path.join(\"/home/merveeyuboglu/Github/ai8x-training/codes/checkpoints/TrueVAL_v3_0911Speed_DynAug\",\"qat_best.pth.tar\") \n",
"trained_checkpoint_path = os.path.join(\"../../ai8x-synthesis/trained\",\"ai85-kws20_v3-qat8.pth.tar\") \n",
"mod = importlib.import_module(\"ai85net-kws20-v3\")\n",
"model_file = \"ai85net-kws20-v3\"\n",
"\n",
Expand Down Expand Up @@ -470,7 +470,7 @@
"metadata": {},
"outputs": [],
"source": [
"trained_checkpoint_path = os.path.join(\"/home/merveeyuboglu/Github/ai8x-training/codes/checkpoints/TrueVAL_v2_0911Speed_DynAug\",\"qat_best.pth.tar\") \n",
"trained_checkpoint_path = os.path.join(\"../../ai8x-synthesis/trained\",\"ai85-kws20_v2-qat8.pth.tar\") \n",
"mod = importlib.import_module(\"ai85net-kws20-v2\")\n",
"model_file = \"ai85net-kws20-v2\"\n",
"\n",
Expand Down Expand Up @@ -503,7 +503,7 @@
"outputs": [],
"source": [
"accuracies = [accuracies_nas, accuracies_v2, accuracies_v3]\n",
"model_files = [\"ai85nasnet_kws20_res_1\", \"ai85net-kws20-v2\", \"ai85net-kws20-v3\"]"
"model_files = [\"ai85net-kws20-nas\", \"ai85net-kws20-v2\", \"ai85net-kws20-v3\"]"
]
},
{
Expand Down

0 comments on commit a8cfe11

Please sign in to comment.