fixing linter errors

EyubogluMerve · Jan 4, 2024 · a8cfe11 · a8cfe11
1 parent 4e78ca8
commit a8cfe11
Show file tree

Hide file tree

Showing 5 changed files with 72 additions and 82 deletions.
diff --git a/datasets/kws20.py b/datasets/kws20.py
@@ -791,4 +791,4 @@ def KWS_35_get_unquantized_datasets(data, load_train=True, load_test=True):
                    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
         'loader': KWS_35_get_unquantized_datasets,
     },
-]
+]
diff --git a/datasets/mixedkws.py b/datasets/mixedkws.py
@@ -31,11 +31,11 @@
 import torch
 from torchvision import transforms
 
-import ai8x
-
 from kws20 import KWS_35_get_unquantized_datasets
 from msnoise import MSnoise_get_unquantized_datasets
 
+import ai8x
+
 
 class MixedKWS:
     """
@@ -221,19 +221,7 @@ def quantize_audio(data, num_bits=8, compand=False, mu=255):
 
     @staticmethod
     def snr_mixer(clean, noise, snr):
-<<<<<<< HEAD
-
-=======
-
-        # Set the input clean range ~[-1,1]
-        max_clean = torch.max(abs(clean.reshape(clean.shape[0], -1)), 1, keepdims = True).values
-        clean = clean * (torch.where(max_clean != 0, 1.0 / max_clean, max_clean)).unsqueeze(1)
-
-        # Set the noise range ~[-1,1]
-        max_noise = torch.max(abs(noise.reshape(noise.shape[0], -1)), 1, keepdims = True).values
-        noise = noise * (torch.where(max_noise != 0, 1.0 / max_noise, max_noise)).unsqueeze(1)
-
->>>>>>> da27cd9ef9d6cf67ebcf35d401b9dd2e62ff5a75
+        """Mix audio with noise at a given SNR level"""
         # Normalizing to rms equal to 1
         rmsclean = torch.mean(clean[:, :125]**2)**0.5
         scalarclean = 1 / rmsclean
@@ -246,10 +234,7 @@ def snr_mixer(clean, noise, snr):
         cleanfactor = 10**(snr/20)
         noisyspeech = cleanfactor*clean + noise
         noisyspeech = noisyspeech / (scalarnoise + cleanfactor * scalarclean)
-
-        # Set the output range ~[-1,1]
-        max_mixed = torch.max(abs(noisyspeech.reshape(noisyspeech.shape[0], 16384)), 1, keepdims = True).values
-        noisyspeech = noisyspeech * (torch.where(max_mixed != 0, 1.0 / max_mixed, max_mixed)).unsqueeze(1)
+
         return noisyspeech
 
     def __gen_datasets(self, exp_len=16384, row_len=128, overlap_ratio=0):

diff --git a/datasets/msnoise.py b/datasets/msnoise.py
@@ -63,11 +63,11 @@ class MSnoise:
                   'CopyMachine': 6, 'Field': 7, 'Hallway': 8, 'Kitchen': 9,
                   'LivingRoom': 10, 'Metro': 11, 'Munching': 12, 'NeighborSpeaking': 13,
                   'Office': 14, 'Park': 15, 'Restaurant': 16, 'ShuttingDoor': 17,
-                  'Square': 18, 'SqueakyChair': 19, 'Station': 20,'TradeShow' : 21, 'Traffic': 22,
+                  'Square': 18, 'SqueakyChair': 19, 'Station': 20, 'TradeShow': 21, 'Traffic': 22,
                   'Typing': 23, 'VacuumCleaner': 24, 'WasherDryer': 25, 'Washing': 26}
 
-    def __init__(self, root, classes, d_type, len, exp_len = 16384, fs = 16000, noise_time_step = 0.25, remove_unknowns=False,
-                 transform=None, quantize=False, download=False):
+    def __init__(self, root, classes, d_type, len, exp_len=16384, fs=16000, noise_time_step=0.25,
+                 remove_unknowns=False, transform=None, quantize=False, download=False):
         self.root = root
         self.classes = classes
         self.d_type = d_type
@@ -91,7 +91,7 @@ def __init__(self, root, classes, d_type, len, exp_len = 16384, fs = 16000, nois
             self.__download()
 
         self.data, self.targets, self.data_type, self.rms_val = self.__gen_datasets()
-        
+
         # rms values for each sample to be returned
         self.rms = np.zeros(self.len)
 
@@ -108,14 +108,13 @@ def __download(self):
 
         if os.path.exists(self.raw_folder):
             return
-        
+
         self.__makedir_exist_ok(self.noise_train_folder)
         self.__makedir_exist_ok(self.noise_test_folder)
 
         self.__download_raw(self.url_train)
         self.__download_raw(self.url_test)
 
-
     def __download_raw(self, api_url):
         opener = urllib.request.build_opener()
         opener.addheaders = [('User-agent', 'Mozilla/5.0')]
@@ -161,7 +160,7 @@ def __filter_dtype(self):
         else:
             print(f'Unknown data type: {self.d_type}')
             return
-        
+
         self.data = [self.data[i] for i in idx_to_select]
         self.targets = [self.targets[i] for i in idx_to_select]
         self.rms_val = [self.rms_val[i] for i in idx_to_select]
@@ -213,8 +212,8 @@ def __len__(self):
     def __getitem__(self, index):
 
         rec_num = len(self.data)
-       
-        rnd_num = np.random.randint(0,rec_num)
+
+        rnd_num = np.random.randint(0, rec_num)
         self.rms[index] = self.rms_val[rnd_num]
 
         rec_len = len(self.data[rnd_num])
@@ -223,7 +222,7 @@ def __getitem__(self, index):
         start_idx = np.random.randint(0, max_start_idx)
         end_idx = start_idx + self.exp_len
 
-        inp = self.reshape_audio(self.data[rnd_num][start_idx:end_idx])
+        inp = self.__reshape_audio(self.data[rnd_num][start_idx:end_idx])
         target = int(self.targets[rnd_num])
 
         if self.quantize:
@@ -232,12 +231,11 @@ def __getitem__(self, index):
             inp = self.transform(inp)
         return inp, target
 
-    def reshape_audio(self, audio, exp_len=16384, row_len=128):
+    def __reshape_audio(self, audio, row_len=128):
 
         return torch.transpose(torch.tensor(audio.reshape((-1, row_len))),1,0)
 
-    def __gen_datasets(self, exp_len=16384, row_len=128, overlap_ratio=0,
-                       noise_time_step=0.25, train_ratio=0.6):
+    def __gen_datasets(self, exp_len=16384, row_len=128, overlap_ratio=0):
 
         with warnings.catch_warnings():
             warnings.simplefilter('error')
@@ -274,30 +272,27 @@ def __gen_datasets(self, exp_len=16384, row_len=128, overlap_ratio=0,
             rms_val = []
 
             for i, label in enumerate(labels):
-                train_count = 0
-                test_count = 0
                 for folder in train_test_folders:
                     for record_name in os.listdir(folder):
                         if record_name.split('_')[0] in label:
                             record_path = os.path.join(folder, record_name)
-                            record, fs = librosa.load(record_path, offset=0, sr=None)
+                            record, _ = librosa.load(record_path, offset=0, sr=None)
 
                             if self.quantize:
                                 data_in.append(self.quantize_audio(record))
                             else:
                                 data_in.append(record)
 
-                            if (folder == self.noise_train_folder):
-                                data_type.append(0) # train + val
-                            elif (folder == self.noise_test_folder):
-                                data_type.append(1) # test
+                            if folder == self.noise_train_folder:
+                                data_type.append(0)  # train + val
+                            elif folder == self.noise_test_folder:
+                                data_type.append(1)  # test
 
                             data_class.append(i)
                             rms_val.append(np.mean(record**2)**0.5)
 
             noise_dataset = (data_in, data_class, data_type, rms_val)
         return noise_dataset
-        print('Dataset created!')
 
 
 def MSnoise_get_datasets(data, load_train=True, load_test=True):
@@ -312,18 +307,20 @@ def MSnoise_get_datasets(data, load_train=True, load_test=True):
     (data_dir, args) = data
 
     classes = ['AirConditioner', 'AirportAnnouncements',
-                'Babble', 'Bus', 'CafeTeria', 'Car',
-                'CopyMachine', 'Field', 'Hallway', 'Kitchen',
-                'LivingRoom', 'Metro', 'Munching', 'NeighborSpeaking',
-                'Office', 'Park', 'Restaurant', 'ShuttingDoor',
-                'Square', 'SqueakyChair', 'Station', 'Traffic',
-                'Typing', 'VacuumCleaner', 'WasherDryer', 'Washing', 'TradeShow']
+               'Babble', 'Bus', 'CafeTeria', 'Car',
+               'CopyMachine', 'Field', 'Hallway', 'Kitchen',
+               'LivingRoom', 'Metro', 'Munching', 'NeighborSpeaking',
+               'Office', 'Park', 'Restaurant', 'ShuttingDoor',
+               'Square', 'SqueakyChair', 'Station', 'Traffic',
+               'Typing', 'VacuumCleaner', 'WasherDryer', 'Washing', 'TradeShow']
 
-    #classes = ['AirConditioner', 'AirportAnnouncements',
-    #           'Babble', 'Bus', 'CafeTeria', 'Car',
-    #           'CopyMachine', 'Metro',
-    #           'Office', 'Restaurant', 'ShuttingDoor',
-    #           'Traffic', 'Typing', 'VacuumCleaner', 'Washing']
+    """
+    classes = ['AirConditioner', 'AirportAnnouncements',
+               'Babble', 'Bus', 'CafeTeria', 'Car',
+               'CopyMachine', 'Metro',
+               'Office', 'Restaurant', 'ShuttingDoor',
+               'Traffic', 'Typing', 'VacuumCleaner', 'Washing']
+    """
 
     remove_unknowns = True
     transform = transforms.Compose([
@@ -407,4 +404,4 @@ def MSnoise_get_unquantized_datasets(data, load_train=True, load_test=True):
                    16, 17, 18, 19, 20, 21, 22, 23, 24, 25),
         'loader': MSnoise_get_unquantized_datasets,
     },
-]
+]
diff --git a/datasets/signalmixer.py b/datasets/signalmixer.py
@@ -25,47 +25,50 @@
 import numpy as np
 import torch
 
+
 class signalmixer:
     """
-    Signal mixer dataloader to create datasets with specified 
+    Signal mixer dataloader to create datasets with specified
     length using a noise dataset and a speech dataset and a specified SNR level.
 
     Args:
     signal_dataset(object): KWS dataset object.
     snr(int): SNR level to be created in the mixed dataset.
     noise_kind(string): Noise kind that will be applied to the speech dataset.
     noise_dataset(object, optional): MSnoise dataset object.
-    
     """
 
     def __init__(self, signal_dataset, snr, noise_kind, noise_dataset = None):
 
         self.signal_data = signal_dataset.data
         self.signal_targets = signal_dataset.targets
 
-        if (noise_kind != 'WhiteNoise'):
+        if noise_kind != 'WhiteNoise':
             self.noise_data = noise_dataset.data
             self.noise_targets = noise_dataset.targets
-
-            # using getitem to reach the noise test data 
-            self.noise_dataset_float = next(iter(torch.utils.data.DataLoader(noise_dataset, batch_size = noise_dataset.len)))[0]
+
+            # using getitem to reach the noise test data
+            self.noise_dataset_float = next(iter(torch.utils.data.DataLoader(
+                noise_dataset, batch_size = noise_dataset.len)))[0]
 
             self.noise_rms = noise_dataset.rms
 
         self.snr = snr
         self.noise_kind = noise_kind
 
-        # using getitem to reach the speech test data 
-        self.test_dataset_float = next(iter(torch.utils.data.DataLoader(signal_dataset, batch_size = signal_dataset.data.shape[0])))[0]
+        # using getitem to reach the speech test data
+        self.test_dataset_float = next(iter(torch.utils.data.DataLoader(
+            signal_dataset, batch_size = signal_dataset.data.shape[0])))[0]
 
-        if (noise_kind == 'WhiteNoise'):
+        if noise_kind == 'WhiteNoise':
             self.mixed_signal = self.white_noise_mixer()
         else:
             self.mixed_signal = self.snr_mixer()
-        
+
     def __getitem__(self, index):
 
-        inp, target = self.mixed_signal[index].type(torch.FloatTensor), int(self.signal_targets[index])
+        inp, = self.mixed_signal[index].type(torch.FloatTensor)
+        target = int(self.signal_targets[index])
         return inp, target
 
     def __len__(self):
@@ -84,23 +87,25 @@ def snr_mixer(self):
 
         snr = self.snr
 
-        rmsclean = (torch.mean(clean.reshape(clean.shape[0], -1)**2, 1, keepdims = True)**0.5).unsqueeze(1)
+        rmsclean = torch.mean(clean.reshape(clean.shape[0], -1)**2, 1, keepdims = True)**0.5.unsqueeze(1)
         scalarclean = 1 / rmsclean
         clean = clean * scalarclean
 
-        scalarnoise = 1 / rms_noise.reshape(-1,1,1)
+        scalarnoise = 1 / rms_noise.reshape(-1, 1, 1)
         noise = noise * scalarnoise
 
         cleanfactor = 10**(snr/20)
-        noisyspeech = cleanfactor*clean + noise
+        noisyspeech = cleanfactor * clean + noise
         noisyspeech = noisyspeech / (torch.tensor(scalarnoise) + cleanfactor * scalarclean)
 
         # 16384 --> (noisyspeech[0].shape[0])*(noisyspeech[0].shape[1])
-        max_mixed = torch.max(abs(noisyspeech.reshape(noisyspeech.shape[0], (noisyspeech[0].shape[0])*(noisyspeech[0].shape[1]))), 1, keepdims = True).values
+        speech_shape = noisyspeech[0].shape[0]*noisyspeech[0].shape[1]
+        max_mixed = torch.max(abs(noisyspeech.reshape(
+                        noisyspeech.shape[0], speech_shape)), 1, keepdims = True).values
 
-        noisyspeech = noisyspeech * (1/max_mixed).unsqueeze(1)
+        noisyspeech = noisyspeech * (1 / max_mixed).unsqueeze(1)
         return noisyspeech
-    
+
     def white_noise_mixer(self):
 
         # creates mixed signal dataset using the SNR level and white noise
@@ -113,20 +118,23 @@ def white_noise_mixer(self):
         noise = np.random.normal(mean, std, clean.shape)
         noise = torch.tensor(noise, dtype = torch.float32)
 
-        rmsclean = (torch.mean(clean.reshape(clean.shape[0], -1)**2, 1, keepdims = True)**0.5).unsqueeze(1)
+        rmsclean = (torch.mean(clean.reshape(
+            clean.shape[0], -1)**2, 1, keepdims = True)**0.5).unsqueeze(1)
         scalarclean = 1 / rmsclean
         clean = clean * scalarclean
 
-        rmsnoise = (torch.mean(noise.reshape(noise.shape[0], -1)**2, 1, keepdims = True)**0.5).unsqueeze(1)
+        rmsnoise = (torch.mean(noise.reshape(
+            noise.shape[0], -1)**2, 1, keepdims = True)**0.5).unsqueeze(1)
         scalarnoise = 1 / rmsnoise
         noise = noise * scalarnoise
 
         cleanfactor = 10**(snr/20)
-        noisyspeech = cleanfactor*clean + noise
+        noisyspeech = cleanfactor * clean + noise
         noisyspeech = noisyspeech / (scalarnoise + cleanfactor * scalarclean)
 
         # scaling to ~[-1,1]
-        max_mixed = torch.max(abs(noisyspeech.reshape(noisyspeech.shape[0], 16384)), 1, keepdims = True).values
-        noisyspeech = noisyspeech * (torch.where(max_mixed != 0, 1.0 / max_mixed, max_mixed)).unsqueeze(1)
+        max_mixed = torch.max(abs(noisyspeech.reshape(
+            noisyspeech.shape[0], 16384)), 1, keepdims = True).values
+        noisyspeech = noisyspeech * (1 / max_mixed).unsqueeze(1)
 
         return noisyspeech
diff --git a/notebooks/Automated_Evaluation_KWS.ipynb b/notebooks/Automated_Evaluation_KWS.ipynb
@@ -74,9 +74,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "trained_checkpoint_path = os.path.join(\"/home/merveeyuboglu/Github/ai8x-training/codes/checkpoints/TrueVAL_NAS_0911Speed_DynAug/\",\"qat_best.pth.tar\")\n",
-    "mod = importlib.import_module(\"ai85nasnet_kws20_res_1\")\n",
-    "model_file = \"ai85nasnet_kws20_res_1\""
+    "trained_checkpoint_path = os.path.join(\"../../ai8x-synthesis/trained\",\"ai85-kws20_nas-qat8.pth.tar\")\n",
+    "mod = importlib.import_module(\"ai85net-kws20-nas\")\n",
+    "model_file = \"ai85net-kws20-nas\""
    ]
   },
   {
@@ -112,7 +112,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "model = mod.AI85NASNET_KWS20_RES_1(num_classes=len(classes), num_channels=128, dimensions=(128, 1), bias=True, \n",
+    "model = mod.AI85KWS20NetNAS(num_classes=len(classes), num_channels=128, dimensions=(128, 1), bias=True, \n",
     "                           quantize_activation=False)\n"
    ]
   },
@@ -431,7 +431,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "trained_checkpoint_path = os.path.join(\"/home/merveeyuboglu/Github/ai8x-training/codes/checkpoints/TrueVAL_v3_0911Speed_DynAug\",\"qat_best.pth.tar\")              \n",
+    "trained_checkpoint_path = os.path.join(\"../../ai8x-synthesis/trained\",\"ai85-kws20_v3-qat8.pth.tar\")              \n",
     "mod = importlib.import_module(\"ai85net-kws20-v3\")\n",
     "model_file = \"ai85net-kws20-v3\"\n",
     "\n",
@@ -470,7 +470,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "trained_checkpoint_path = os.path.join(\"/home/merveeyuboglu/Github/ai8x-training/codes/checkpoints/TrueVAL_v2_0911Speed_DynAug\",\"qat_best.pth.tar\")              \n",
+    "trained_checkpoint_path = os.path.join(\"../../ai8x-synthesis/trained\",\"ai85-kws20_v2-qat8.pth.tar\")              \n",
     "mod = importlib.import_module(\"ai85net-kws20-v2\")\n",
     "model_file = \"ai85net-kws20-v2\"\n",
     "\n",
@@ -503,7 +503,7 @@
    "outputs": [],
    "source": [
     "accuracies = [accuracies_nas, accuracies_v2, accuracies_v3]\n",
-    "model_files = [\"ai85nasnet_kws20_res_1\", \"ai85net-kws20-v2\", \"ai85net-kws20-v3\"]"
+    "model_files = [\"ai85net-kws20-nas\", \"ai85net-kws20-v2\", \"ai85net-kws20-v3\"]"
    ]
   },
   {