BahmanTahayori · Lestropie · Jul 31, 2023 · Nov 1, 2023 · Nov 1, 2023 · Nov 1, 2023
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/pyproject.toml b/pyproject.toml
@@ -25,6 +25,7 @@ dependencies = [
     "nibabel>=2.5.1",
     "nilearn>=0.7",
     "numpy>=1.16",
+    "robustica>=0.1.3",
     "pandas>=2.0",
     "scikit-learn>=0.21",
     "scipy>=1.2.0",

diff --git a/tedana/config.py b/tedana/config.py
@@ -0,0 +1,4 @@
+DEFAULT_ICA_METHOD = "robustica"
+DEFAULT_N_ROBUST_RUNS = 30
+DEFAULT_N_MAX_ITER = 500
+DEFAULT_N_MAX_RESTART = 10
diff --git a/tedana/decomposition/ica.py b/tedana/decomposition/ica.py
@@ -1,27 +1,48 @@
-"""ICA and related signal decomposition methods for tedana."""
+"""ICA and related signal decomposition methods for tedana"""
 import logging
 import warnings
 
 import numpy as np
+from robustica import RobustICA
 from scipy import stats
 from sklearn.decomposition import FastICA
 
+from tedana.config import (
+    DEFAULT_ICA_METHOD,
+    DEFAULT_N_MAX_ITER,
+    DEFAULT_N_MAX_RESTART,
+    DEFAULT_N_ROBUST_RUNS,
+)
+
 LGR = logging.getLogger("GENERAL")
 RepLGR = logging.getLogger("REPORT")
 
 
-def tedica(data, n_components, fixed_seed, maxit=500, maxrestart=10):
-    """Perform ICA on ``data`` and return mixing matrix.
+def tedica(
+    data,
+    n_components,
+    fixed_seed,
+    ica_method=DEFAULT_ICA_METHOD,
+    n_robust_runs=DEFAULT_N_ROBUST_RUNS,
+    maxit=DEFAULT_N_MAX_ITER,
+    maxrestart=DEFAULT_N_MAX_RESTART,
+):
+    """
+    Perform ICA on `data` with the user selected ica method and returns mixing matrix
 
     Parameters
     ----------
     data : (S x T) :obj:`numpy.ndarray`
         Dimensionally reduced optimally combined functional data, where `S` is
         samples and `T` is time
     n_components : :obj:`int`
-        Number of components retained from PCA decomposition
+        Number of components retained from PCA decomposition.
     fixed_seed : :obj:`int`
-        Seed for ensuring reproducibility of ICA results
+        Seed for ensuring reproducibility of ICA results.
+    ica_method : :obj: `str'
+        slected ICA method, can be fastica or robutica.
+    n_robust_runs : :obj: `int'
+        selected number of robust runs when robustica is used. Default is 30.
     maxit : :obj:`int`, optional
         Maximum number of iterations for ICA. Default is 500.
     maxrestart : :obj:`int`, optional
@@ -37,16 +58,159 @@ def tedica(data, n_components, fixed_seed, maxit=500, maxrestart=10):
     fixed_seed : :obj:`int`
         Random seed from final decomposition.
 
-    Notes
-    -----
-    Uses `sklearn` implementation of FastICA for decomposition
     """
     warnings.filterwarnings(action="ignore", module="scipy", message="^internal gelsd")
     RepLGR.info(
         "Independent component analysis was then used to "
         "decompose the dimensionally reduced dataset."
     )
 
+    ica_method = ica_method.lower()
+
+    if ica_method == "robustica":
+        mmix, fixed_seed = r_ica(
+            data,
+            n_components=n_components,
+            fixed_seed=fixed_seed,
+            n_robust_runs=n_robust_runs,
+            max_it=maxit,
+        )
+    elif ica_method == "fastica":
+        mmix, fixed_seed = f_ica(
+            data,
+            n_components=n_components,
+            fixed_seed=fixed_seed,
+            maxit=maxit,
+            maxrestart=maxrestart,
+        )
+    else:
+        raise ValueError("The selected ICA method is invalid!")
+
+    return mmix, fixed_seed
+
+
+def r_ica(data, n_components, fixed_seed, n_robust_runs, max_it):
+    """
+    Perform robustica on `data` by running FastICA multiple times (n_robust runes)
+    and returns mixing matrix
+
+    Parameters
+    ----------
+    data : (S x T) :obj:`numpy.ndarray`
+        Dimensionally reduced optimally combined functional data, where `S` is
+        samples and `T` is time
+    n_components : :obj:`int`
+        Number of components retained from PCA decomposition.
+    fixed_seed : :obj:`int`
+        Seed for ensuring reproducibility of ICA results.
+    n_robust_runs : :obj: `int'
+        selected number of robust runs when robustica is used. Default is 30.
+    maxit : :obj:`int`, optional
+        Maximum number of iterations for ICA. Default is 500.
+
+    Returns
+    -------
+    mmix : (T x C) :obj:`numpy.ndarray`
+        Z-scored mixing matrix for converting input data to component space,
+        where `C` is components and `T` is the same as in `data`
+    fixed_seed : :obj:`int`
+        Random seed from final decomposition.
+
+    """
+    if n_robust_runs > 200:
+        LGR.warning(
+            "The selected n_robust_runs is a very big number! The process will take a long time!"
+        )
+
+    RepLGR.info("RobustICA package was used for ICA decomposition \\citep{Anglada2022}.")
+
+    if fixed_seed == -1:
+        fixed_seed = np.random.randint(low=1, high=1000)
+
+    try:
+        rica = RobustICA(
+            n_components=n_components,
+            robust_runs=n_robust_runs,
+            whiten="arbitrary-variance",
+            max_iter=max_it,
+            random_state=fixed_seed,
+            robust_dimreduce=False,
+            fun="logcosh",
+            robust_method="DBSCAN",
+        )
+
+        S, mmix = rica.fit_transform(data)
+        q = rica.evaluate_clustering(
+            rica.S_all, rica.clustering.labels_, rica.signs_, rica.orientation_
+        )
+
+    except:
+        rica = RobustICA(
+            n_components=n_components,
+            robust_runs=n_robust_runs,
+            whiten="arbitrary-variance",
+            max_iter=max_it,
+            random_state=fixed_seed,
+            robust_dimreduce=False,
+            fun="logcosh",
+            robust_method="AgglomerativeClustering",
+        )
+
+        S, mmix = rica.fit_transform(data)
+        q = rica.evaluate_clustering(
+            rica.S_all, rica.clustering.labels_, rica.signs_, rica.orientation_
+        )
+
+    iq = np.array(np.mean(q[q["cluster_id"] >= 0].iq))  # The cluster labeled -1 is noise
+
+    if iq < 0.6:
+        LGR.warning(
+            "The resultant mean Index Quality is low. It  is recommended to rerun the "
+            "process with a different seed."
+        )
+
+    mmix = mmix[:, q["cluster_id"] >= 0]
+    mmix = stats.zscore(mmix, axis=0)
+
+    LGR.info(
+        "RobustICA with {0} robust runs and seed {1} was used. "
+        "The mean Index Quality is {2}".format(n_robust_runs, fixed_seed, iq)
+    )
+    return mmix, fixed_seed
+
+
+def f_ica(data, n_components, fixed_seed, maxit, maxrestart):
+    """
+    Perform FastICA on `data` and returns mixing matrix
+
+    Parameters
+    ----------
+    data : (S x T) :obj:`numpy.ndarray`
+        Dimensionally reduced optimally combined functional data, where `S` is
+        samples and `T` is time
+    n_components : :obj:`int`
+        Number of components retained from PCA decomposition
+    fixed_seed : :obj:`int`
+        Seed for ensuring reproducibility of ICA results
+    maxit : :obj:`int`, optional
+        Maximum number of iterations for ICA. Default is 500.
+    maxrestart : :obj:`int`, optional
+        Maximum number of attempted decompositions to perform with different
+        random seeds. ICA will stop running if there is convergence prior to
+        reaching this limit. Default is 10.
+
+    Returns
+    -------
+    mmix : (T x C) :obj:`numpy.ndarray`
+        Z-scored mixing matrix for converting input data to component space,
+        where `C` is components and `T` is the same as in `data`
+    fixed_seed : :obj:`int`
+        Random seed from final decomposition.
+
+    Notes
+    -----
+    Uses `sklearn` implementation of FastICA for decomposition
+    """
     if fixed_seed == -1:
         fixed_seed = np.random.randint(low=1, high=1000)
 
@@ -69,18 +233,19 @@ def tedica(data, n_components, fixed_seed, maxit=500, maxrestart=10):
             w = list(filter(lambda i: issubclass(i.category, UserWarning), w))
             if len(w):
                 LGR.warning(
-                    f"ICA with random seed {fixed_seed} failed to converge after {ica.n_iter_} "
-                    "iterations"
+                    "ICA with random seed {0} failed to converge after {1} "
+                    "iterations".format(fixed_seed, ica.n_iter_)
                 )
                 if i_attempt < maxrestart - 1:
                     fixed_seed += 1
-                    LGR.warning(f"Random seed updated to {fixed_seed}")
+                    LGR.warning("Random seed updated to {0}".format(fixed_seed))
             else:
                 LGR.info(
-                    f"ICA with random seed {fixed_seed} converged in {ica.n_iter_} iterations"
+                    "ICA with random seed {0} converged in {1} "
+                    "iterations".format(fixed_seed, ica.n_iter_)
                 )
                 break
 
     mmix = ica.mixing_
     mmix = stats.zscore(mmix, axis=0)
-    return mmix, fixed_seed
+    return mmix, fixed_seed
diff --git a/tedana/resources/references.bib b/tedana/resources/references.bib
@@ -313,3 +313,13 @@ @Article{Hunter:2007
   doi       = {10.1109/MCSE.2007.55},
   year      = 2007
 }
+
+@Article{Anglada:2022,
+  Author    = {Anglada-Girotto Miquel and Miravet-Verde Samuel and Serrano Luis and Head Sarah},
+  Title     = {robustica: customizable robust independent component analysis},
+  Journal   = {BMC Bioinformatics},
+  Volume    = {23},
+  Number    = {519},
+  doi       = {10.1186/s12859-022-05043-9},
+  year      = 2022
+}