Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add robustica method (internal PR review) #2

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

52 changes: 52 additions & 0 deletions .idea/workspace.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ dependencies = [
"nibabel>=2.5.1",
"nilearn>=0.7",
"numpy>=1.16",
"robustica>=0.1.3",
"pandas>=2.0",
"scikit-learn>=0.21",
"scipy>=1.2.0",
Expand Down
4 changes: 4 additions & 0 deletions tedana/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
DEFAULT_ICA_METHOD = "robustica"
DEFAULT_N_ROBUST_RUNS = 30
DEFAULT_N_MAX_ITER = 500
DEFAULT_N_MAX_RESTART = 10
191 changes: 178 additions & 13 deletions tedana/decomposition/ica.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,48 @@
"""ICA and related signal decomposition methods for tedana."""
"""ICA and related signal decomposition methods for tedana"""
import logging
import warnings

import numpy as np
from robustica import RobustICA
from scipy import stats
from sklearn.decomposition import FastICA

from tedana.config import (
DEFAULT_ICA_METHOD,
DEFAULT_N_MAX_ITER,
DEFAULT_N_MAX_RESTART,
DEFAULT_N_ROBUST_RUNS,
)

LGR = logging.getLogger("GENERAL")
RepLGR = logging.getLogger("REPORT")


def tedica(data, n_components, fixed_seed, maxit=500, maxrestart=10):
"""Perform ICA on ``data`` and return mixing matrix.
def tedica(
data,
n_components,
fixed_seed,
ica_method=DEFAULT_ICA_METHOD,
n_robust_runs=DEFAULT_N_ROBUST_RUNS,
maxit=DEFAULT_N_MAX_ITER,
maxrestart=DEFAULT_N_MAX_RESTART,
):
"""
Perform ICA on `data` with the user selected ica method and returns mixing matrix

Parameters
----------
data : (S x T) :obj:`numpy.ndarray`
Dimensionally reduced optimally combined functional data, where `S` is
samples and `T` is time
n_components : :obj:`int`
Number of components retained from PCA decomposition
Number of components retained from PCA decomposition.
fixed_seed : :obj:`int`
Seed for ensuring reproducibility of ICA results
Seed for ensuring reproducibility of ICA results.
ica_method : :obj: `str'
slected ICA method, can be fastica or robutica.
n_robust_runs : :obj: `int'
selected number of robust runs when robustica is used. Default is 30.
maxit : :obj:`int`, optional
Maximum number of iterations for ICA. Default is 500.
maxrestart : :obj:`int`, optional
Expand All @@ -37,16 +58,159 @@ def tedica(data, n_components, fixed_seed, maxit=500, maxrestart=10):
fixed_seed : :obj:`int`
Random seed from final decomposition.

Notes
-----
Uses `sklearn` implementation of FastICA for decomposition
"""
warnings.filterwarnings(action="ignore", module="scipy", message="^internal gelsd")
RepLGR.info(
"Independent component analysis was then used to "
"decompose the dimensionally reduced dataset."
)

ica_method = ica_method.lower()

if ica_method == "robustica":
mmix, fixed_seed = r_ica(
data,
n_components=n_components,
fixed_seed=fixed_seed,
n_robust_runs=n_robust_runs,
max_it=maxit,
)
elif ica_method == "fastica":
mmix, fixed_seed = f_ica(
data,
n_components=n_components,
fixed_seed=fixed_seed,
maxit=maxit,
maxrestart=maxrestart,
)
else:
raise ValueError("The selected ICA method is invalid!")

return mmix, fixed_seed


def r_ica(data, n_components, fixed_seed, n_robust_runs, max_it):
"""
Perform robustica on `data` by running FastICA multiple times (n_robust runes)
and returns mixing matrix

Parameters
----------
data : (S x T) :obj:`numpy.ndarray`
Dimensionally reduced optimally combined functional data, where `S` is
samples and `T` is time
n_components : :obj:`int`
Number of components retained from PCA decomposition.
fixed_seed : :obj:`int`
Seed for ensuring reproducibility of ICA results.
n_robust_runs : :obj: `int'
selected number of robust runs when robustica is used. Default is 30.
maxit : :obj:`int`, optional
Maximum number of iterations for ICA. Default is 500.

Returns
-------
mmix : (T x C) :obj:`numpy.ndarray`
Z-scored mixing matrix for converting input data to component space,
where `C` is components and `T` is the same as in `data`
fixed_seed : :obj:`int`
Random seed from final decomposition.

"""
if n_robust_runs > 200:
LGR.warning(
"The selected n_robust_runs is a very big number! The process will take a long time!"
)

RepLGR.info("RobustICA package was used for ICA decomposition \\citep{Anglada2022}.")

if fixed_seed == -1:
fixed_seed = np.random.randint(low=1, high=1000)

try:
rica = RobustICA(
n_components=n_components,
robust_runs=n_robust_runs,
whiten="arbitrary-variance",
max_iter=max_it,
random_state=fixed_seed,
robust_dimreduce=False,
fun="logcosh",
robust_method="DBSCAN",
)

S, mmix = rica.fit_transform(data)
q = rica.evaluate_clustering(
rica.S_all, rica.clustering.labels_, rica.signs_, rica.orientation_
)

except:
rica = RobustICA(
n_components=n_components,
robust_runs=n_robust_runs,
whiten="arbitrary-variance",
max_iter=max_it,
random_state=fixed_seed,
robust_dimreduce=False,
fun="logcosh",
robust_method="AgglomerativeClustering",
)

S, mmix = rica.fit_transform(data)
q = rica.evaluate_clustering(
rica.S_all, rica.clustering.labels_, rica.signs_, rica.orientation_
)

iq = np.array(np.mean(q[q["cluster_id"] >= 0].iq)) # The cluster labeled -1 is noise

if iq < 0.6:
LGR.warning(
"The resultant mean Index Quality is low. It is recommended to rerun the "
"process with a different seed."
)

mmix = mmix[:, q["cluster_id"] >= 0]
mmix = stats.zscore(mmix, axis=0)

LGR.info(
"RobustICA with {0} robust runs and seed {1} was used. "
"The mean Index Quality is {2}".format(n_robust_runs, fixed_seed, iq)
)
return mmix, fixed_seed


def f_ica(data, n_components, fixed_seed, maxit, maxrestart):
"""
Perform FastICA on `data` and returns mixing matrix

Parameters
----------
data : (S x T) :obj:`numpy.ndarray`
Dimensionally reduced optimally combined functional data, where `S` is
samples and `T` is time
n_components : :obj:`int`
Number of components retained from PCA decomposition
fixed_seed : :obj:`int`
Seed for ensuring reproducibility of ICA results
maxit : :obj:`int`, optional
Maximum number of iterations for ICA. Default is 500.
maxrestart : :obj:`int`, optional
Maximum number of attempted decompositions to perform with different
random seeds. ICA will stop running if there is convergence prior to
reaching this limit. Default is 10.

Returns
-------
mmix : (T x C) :obj:`numpy.ndarray`
Z-scored mixing matrix for converting input data to component space,
where `C` is components and `T` is the same as in `data`
fixed_seed : :obj:`int`
Random seed from final decomposition.

Notes
-----
Uses `sklearn` implementation of FastICA for decomposition
"""
if fixed_seed == -1:
fixed_seed = np.random.randint(low=1, high=1000)

Expand All @@ -69,18 +233,19 @@ def tedica(data, n_components, fixed_seed, maxit=500, maxrestart=10):
w = list(filter(lambda i: issubclass(i.category, UserWarning), w))
if len(w):
LGR.warning(
f"ICA with random seed {fixed_seed} failed to converge after {ica.n_iter_} "
"iterations"
"ICA with random seed {0} failed to converge after {1} "
"iterations".format(fixed_seed, ica.n_iter_)
)
if i_attempt < maxrestart - 1:
fixed_seed += 1
LGR.warning(f"Random seed updated to {fixed_seed}")
LGR.warning("Random seed updated to {0}".format(fixed_seed))
else:
LGR.info(
f"ICA with random seed {fixed_seed} converged in {ica.n_iter_} iterations"
"ICA with random seed {0} converged in {1} "
"iterations".format(fixed_seed, ica.n_iter_)
)
break

mmix = ica.mixing_
mmix = stats.zscore(mmix, axis=0)
return mmix, fixed_seed
return mmix, fixed_seed
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GitHub shows a red circled dash at the end of some files. This indicates that the file does not contain a newline at the end. The final non-empty line of the file should contain a newline, but there should be no empty lines beyond that.

10 changes: 10 additions & 0 deletions tedana/resources/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -313,3 +313,13 @@ @Article{Hunter:2007
doi = {10.1109/MCSE.2007.55},
year = 2007
}

@Article{Anglada:2022,
Author = {Anglada-Girotto Miquel and Miravet-Verde Samuel and Serrano Luis and Head Sarah},
Title = {robustica: customizable robust independent component analysis},
Journal = {BMC Bioinformatics},
Volume = {23},
Number = {519},
doi = {10.1186/s12859-022-05043-9},
year = 2022
}
Loading