Dev: refactorization and added docstring

MMIV-ML · Jul 12, 2023 · e47beaf · e47beaf
1 parent 1fe9061
commit e47beaf
Show file tree

Hide file tree

Showing 24 changed files with 1,658 additions and 1,860 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -1,2 +1,9 @@
 # How to contribute
-fastMONAI follows the same contribution policy as fastai: https://github.com/fastai/nbdev/blob/master/CONTRIBUTING.md
+For any issues related to the source code, please open an issue in the corresponding GitHub repository. Contributions to the code or the model are welcome and should be proposed through a pull request.
+
+## How to get started
+Install the git hooks that run automatic scripts during each commit and merge to strip the notebooks of superfluous metadata (and avoid merge conflicts). After cloning the repository, run the following command inside it:
+nbdev_install_hooks
+
+1. pip install -e 'fastMONAI[dev]'
+2. nbdev_install_hooks
diff --git a/fastMONAI/__init__.py b/fastMONAI/__init__.py
@@ -1 +1 @@
-__version__ = "0.3.1"
+__version__ = "0.3.2"
diff --git a/fastMONAI/_modidx.py b/fastMONAI/_modidx.py
@@ -29,10 +29,10 @@
                                                                                                'fastMONAI/external_data.py'),
                                          'fastMONAI.external_data._process_ixi_xls': ( 'external_data.html#_process_ixi_xls',
                                                                                        'fastMONAI/external_data.py'),
-                                         'fastMONAI.external_data._process_nodule_img': ( 'external_data.html#_process_nodule_img',
-                                                                                          'fastMONAI/external_data.py'),
-                                         'fastMONAI.external_data.download_NoduleMNIST3D': ( 'external_data.html#download_nodulemnist3d',
-                                                                                             'fastMONAI/external_data.py'),
+                                         'fastMONAI.external_data._process_medmnist_img': ( 'external_data.html#_process_medmnist_img',
+                                                                                            'fastMONAI/external_data.py'),
+                                         'fastMONAI.external_data.download_and_process_MedMNIST3D': ( 'external_data.html#download_and_process_medmnist3d',
+                                                                                                      'fastMONAI/external_data.py'),
                                          'fastMONAI.external_data.download_example_spine_data': ( 'external_data.html#download_example_spine_data',
                                                                                                   'fastMONAI/external_data.py'),
                                          'fastMONAI.external_data.download_ixi_data': ( 'external_data.html#download_ixi_data',
@@ -129,24 +129,10 @@
                                                                                                  'fastMONAI/vision_augmentation.py'),
                                                'fastMONAI.vision_augmentation.ZNormalization.__init__': ( 'vision_augment.html#znormalization.__init__',
                                                                                                           'fastMONAI/vision_augmentation.py'),
+                                               'fastMONAI.vision_augmentation.ZNormalization._do_z_normalization': ( 'vision_augment.html#znormalization._do_z_normalization',
+                                                                                                                     'fastMONAI/vision_augmentation.py'),
                                                'fastMONAI.vision_augmentation.ZNormalization.encodes': ( 'vision_augment.html#znormalization.encodes',
                                                                                                          'fastMONAI/vision_augmentation.py'),
-                                               'fastMONAI.vision_augmentation._do_rand_biasfield': ( 'vision_augment.html#_do_rand_biasfield',
-                                                                                                     'fastMONAI/vision_augmentation.py'),
-                                               'fastMONAI.vision_augmentation._do_rand_blur': ( 'vision_augment.html#_do_rand_blur',
-                                                                                                'fastMONAI/vision_augmentation.py'),
-                                               'fastMONAI.vision_augmentation._do_rand_gamma': ( 'vision_augment.html#_do_rand_gamma',
-                                                                                                 'fastMONAI/vision_augmentation.py'),
-                                               'fastMONAI.vision_augmentation._do_rand_ghosting': ( 'vision_augment.html#_do_rand_ghosting',
-                                                                                                    'fastMONAI/vision_augmentation.py'),
-                                               'fastMONAI.vision_augmentation._do_rand_motion': ( 'vision_augment.html#_do_rand_motion',
-                                                                                                  'fastMONAI/vision_augmentation.py'),
-                                               'fastMONAI.vision_augmentation._do_rand_noise': ( 'vision_augment.html#_do_rand_noise',
-                                                                                                 'fastMONAI/vision_augmentation.py'),
-                                               'fastMONAI.vision_augmentation._do_rand_spike': ( 'vision_augment.html#_do_rand_spike',
-                                                                                                 'fastMONAI/vision_augmentation.py'),
-                                               'fastMONAI.vision_augmentation._do_z_normalization': ( 'vision_augment.html#_do_z_normalization',
-                                                                                                      'fastMONAI/vision_augmentation.py'),
                                                'fastMONAI.vision_augmentation.do_pad_or_crop': ( 'vision_augment.html#do_pad_or_crop',
                                                                                                  'fastMONAI/vision_augmentation.py')},
             'fastMONAI.vision_core': { 'fastMONAI.vision_core.MedBase': ('vision_core.html#medbase', 'fastMONAI/vision_core.py'),

diff --git a/fastMONAI/dataset_info.py b/fastMONAI/dataset_info.py
@@ -14,18 +14,23 @@
 import glob
 
 # %% ../nbs/08_dataset_info.ipynb 4
-class MedDataset():
-    '''A class to extract and present information about the dataset.'''
-
-    def __init__(self, path=None, # Path to the image folder
-                 postfix:str='', # Specify the file type if there are different files in the folder
-                 img_list:list=None, # Alternatively pass in a list with image paths
-                 reorder:bool=False, # Whether to reorder the data to be closest to canonical (RAS+) orientation
-                 dtype:(MedImage, MedMask)=MedImage, # Load data as datatype
-                 max_workers:int=1 #  The number of worker threads
-                ):
-        '''Constructs all the necessary attributes for the MedDataset object.'''
+class MedDataset:
+    """A class to extract and present information about the dataset."""
 
+    def __init__(self, path=None, postfix: str = '', img_list: list = None,
+                 reorder: bool = False, dtype: (MedImage, MedMask) = MedImage,
+                 max_workers: int = 1):
+        """Constructs MedDataset object.
+
+        Args:
+            path (str, optional): Path to the image folder.
+            postfix (str, optional): Specify the file type if there are different files in the folder.
+            img_list (List[str], optional): Alternatively, pass in a list with image paths.
+            reorder (bool, optional): Whether to reorder the data to be closest to canonical (RAS+) orientation.
+            dtype (Union[MedImage, MedMask], optional): Load data as datatype. Default is MedImage.
+            max_workers (int, optional): The number of worker threads. Default is 1.
+        """
+
         self.path = path
         self.postfix = postfix
         self.img_list = img_list
@@ -35,48 +40,43 @@ def __init__(self, path=None, # Path to the image folder
         self.df = self._create_data_frame()
 
     def _create_data_frame(self):
-        '''Private method that returns a dataframe with information about the dataset
-
-        Returns:
-            DataFrame: A DataFrame with information about the dataset.
-        '''
+        """Private method that returns a dataframe with information about the dataset."""
 
         if self.path:
             self.img_list = glob.glob(f'{self.path}/*{self.postfix}*')
             if not self.img_list: print('Could not find images. Check the image path')
-        
+
         with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
             data_info_dict = list(executor.map(self._get_data_info, self.img_list))
-        
+
         df = pd.DataFrame(data_info_dict)
-        if df.orientation.nunique() > 1: print('The volumes in this dataset have different orientations. Recommended to pass in the argument reorder=True when creating a MedDataset object for this dataset')
+
+        if df.orientation.nunique() > 1:
+            print('The volumes in this dataset have different orientations. '
+                  'Recommended to pass in the argument reorder=True when creating a MedDataset object for this dataset')
+
         return df
 
     def summary(self):
-        '''Summary DataFrame of the dataset with example path for similar data.'''
-
+        """Summary DataFrame of the dataset with example path for similar data."""
+        
         columns = ['dim_0', 'dim_1', 'dim_2', 'voxel_0', 'voxel_1', 'voxel_2', 'orientation']
-        return self.df.groupby(columns,as_index=False).agg(example_path=('path', 'min'), total=('path', 'size')).sort_values('total', ascending=False)
+
+        return self.df.groupby(columns, as_index=False).agg(
+            example_path=('path', 'min'), total=('path', 'size')
+        ).sort_values('total', ascending=False)
 
     def suggestion(self):
-        '''Voxel value that appears most often in dim_0, dim_1 and dim_2, and wheter the data should be reoriented.'''
+        """Voxel value that appears most often in dim_0, dim_1 and dim_2, and whether the data should be reoriented."""
+
         resample = [self.df.voxel_0.mode()[0], self.df.voxel_1.mode()[0], self.df.voxel_2.mode()[0]]
-
         return resample, self.reorder
 
-    def _get_data_info(self, fn:str):
-        '''Private method to collect information about an image file.
+    def _get_data_info(self, fn: str):
+        """Private method to collect information about an image file."""
+        _, o, _ = med_img_reader(fn, dtype=self.dtype, reorder=self.reorder, only_tensor=False)
 
-        Args:
-            fn: Image file path.
-
-        Returns:
-            dict: A dictionary with information about the image file
-        '''
-
-        _,o,_ = med_img_reader(fn, dtype=self.dtype, reorder=self.reorder, only_tensor=False)
-
-        info_dict = {'path': fn,  'dim_0': o.shape[1],  'dim_1': o.shape[2],  'dim_2' :o.shape[3],
+        info_dict = {'path': fn, 'dim_0': o.shape[1], 'dim_1': o.shape[2], 'dim_2': o.shape[3],
                      'voxel_0': round(o.spacing[0], 4), 'voxel_1': round(o.spacing[1], 4), 'voxel_2': round(o.spacing[2], 4),
                      'orientation': f'{"".join(o.orientation)}+'}
 
@@ -87,28 +87,36 @@ def _get_data_info(self, fn:str):
 
         return info_dict
 
-    def get_largest_img_size(self,
-                             resample:list=None # A list with voxel spacing [dim_0, dim_1, dim_2]
-                            ) -> list:
-        '''Get the largest image size in the dataset.'''
-        dims = None 
+    def get_largest_img_size(self, resample: list = None) -> list:
+        """Get the largest image size in the dataset."""
 
-        if resample is not None: 
-
+        dims = None
+
+        if resample is not None:
             org_voxels = self.df[["voxel_0", "voxel_1", 'voxel_2']].values
             org_dims = self.df[["dim_0", "dim_1", 'dim_2']].values
-            
+
             ratio = org_voxels/resample
             new_dims = (org_dims * ratio).T
             dims = [new_dims[0].max().round(), new_dims[1].max().round(), new_dims[2].max().round()]
-
-        else: dims = [df.dim_0.max(), df.dim_1.max(), df.dim_2.max()]
-
+
+        else:
+            dims = [df.dim_0.max(), df.dim_1.max(), df.dim_2.max()]
+
         return dims
 
 # %% ../nbs/08_dataset_info.ipynb 5
-def get_class_weights(train_labels:(np.array, list), class_weight='balanced'): 
-    '''calculate class weights.'''
+def get_class_weights(labels: (np.array, list), class_weight: str = 'balanced') -> torch.Tensor: 
+    """Calculates and returns the class weights.
+
+    Args:
+        labels: An array or list of class labels for each instance in the dataset.
+        class_weight: Defaults to 'balanced'.
+
+    Returns:
+        A tensor of class weights.
+    """
+
+    class_weights =  compute_class_weight(class_weight=class_weight, classes=np.unique(labels), y=labels)
 
-    class_weights =  compute_class_weight(class_weight=class_weight, classes=np.unique(train_labels), y=train_labels)
     return torch.Tensor(class_weights)