-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add robustica method (internal PR review) #2
Changes from 7 commits
f4eaa3e
f2cdb4e
ac4d008
bd615b2
41354cb
4714baf
8c12003
46ba44c
0b68815
d45ce0f
05d6f79
da1b128
6b308c1
a403e44
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
DEFAULT_ICA_METHOD = "robustica" | ||
DEFAULT_N_ROBUST_RUNS = 30 | ||
DEFAULT_N_MAX_ITER = 500 | ||
DEFAULT_N_MAX_RESTART = 10 | ||
DEFAULT_SEED = 42 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,27 +1,48 @@ | ||
"""ICA and related signal decomposition methods for tedana.""" | ||
"""ICA and related signal decomposition methods for tedana""" | ||
import logging | ||
import warnings | ||
|
||
import numpy as np | ||
from robustica import RobustICA | ||
from scipy import stats | ||
from sklearn.decomposition import FastICA | ||
|
||
from tedana.config import ( | ||
DEFAULT_ICA_METHOD, | ||
DEFAULT_N_MAX_ITER, | ||
DEFAULT_N_MAX_RESTART, | ||
DEFAULT_N_ROBUST_RUNS, | ||
) | ||
|
||
LGR = logging.getLogger("GENERAL") | ||
RepLGR = logging.getLogger("REPORT") | ||
|
||
|
||
def tedica(data, n_components, fixed_seed, maxit=500, maxrestart=10): | ||
"""Perform ICA on ``data`` and return mixing matrix. | ||
def tedica( | ||
data, | ||
n_components, | ||
fixed_seed, | ||
ica_method=DEFAULT_ICA_METHOD, | ||
n_robust_runs=DEFAULT_N_ROBUST_RUNS, | ||
maxit=DEFAULT_N_MAX_ITER, | ||
maxrestart=DEFAULT_N_MAX_RESTART, | ||
): | ||
""" | ||
Perform ICA on `data` with the user selected ica method and returns mixing matrix | ||
|
||
Parameters | ||
---------- | ||
data : (S x T) :obj:`numpy.ndarray` | ||
Dimensionally reduced optimally combined functional data, where `S` is | ||
samples and `T` is time | ||
n_components : :obj:`int` | ||
Number of components retained from PCA decomposition | ||
Number of components retained from PCA decomposition. | ||
fixed_seed : :obj:`int` | ||
Seed for ensuring reproducibility of ICA results | ||
Seed for ensuring reproducibility of ICA results. | ||
ica_method : :obj: `str' | ||
slected ICA method, can be fastica or robutica. | ||
n_robust_runs : :obj: `int' | ||
selected number of robust runs when robustica is used. Default is 30. | ||
maxit : :obj:`int`, optional | ||
Maximum number of iterations for ICA. Default is 500. | ||
maxrestart : :obj:`int`, optional | ||
|
@@ -37,16 +58,159 @@ def tedica(data, n_components, fixed_seed, maxit=500, maxrestart=10): | |
fixed_seed : :obj:`int` | ||
Random seed from final decomposition. | ||
|
||
Notes | ||
----- | ||
Uses `sklearn` implementation of FastICA for decomposition | ||
""" | ||
warnings.filterwarnings(action="ignore", module="scipy", message="^internal gelsd") | ||
RepLGR.info( | ||
"Independent component analysis was then used to " | ||
"decompose the dimensionally reduced dataset." | ||
) | ||
|
||
ica_method = ica_method.lower() | ||
|
||
if ica_method == "robustica": ##The issue with fixed seed in robustica was resolved, see https://github.com/CRG-CNAG/robustica/issues/3 | ||
mmix, fixed_seed = r_ica( | ||
data, | ||
n_components=n_components, | ||
fixed_seed=fixed_seed, | ||
n_robust_runs=n_robust_runs, | ||
max_it=maxit, | ||
) | ||
elif ica_method == "fastica": | ||
mmix, fixed_seed = f_ica( | ||
data, | ||
n_components=n_components, | ||
fixed_seed=fixed_seed, | ||
maxit=maxit, | ||
maxrestart=maxrestart, | ||
) | ||
else: | ||
raise ValueError("The selected ICA method is invalid!") | ||
|
||
return mmix, fixed_seed | ||
|
||
|
||
def r_ica(data, n_components, fixed_seed, n_robust_runs, max_it): | ||
""" | ||
Perform robustica on `data` by running FastICA multiple times (n_robust_runs) | ||
and returns mixing matrix | ||
|
||
Parameters | ||
---------- | ||
data : (S x T) :obj:`numpy.ndarray` | ||
Dimensionally reduced optimally combined functional data, where `S` is | ||
samples and `T` is time | ||
n_components : :obj:`int` | ||
Number of components retained from PCA decomposition. | ||
fixed_seed : :obj:`int` | ||
Seed for ensuring reproducibility of ICA results. | ||
n_robust_runs : :obj: `int' | ||
selected number of robust runs when robustica is used. Default is 30. | ||
maxit : :obj:`int`, optional | ||
Maximum number of iterations for ICA. Default is 500. | ||
|
||
Returns | ||
------- | ||
mmix : (T x C) :obj:`numpy.ndarray` | ||
Z-scored mixing matrix for converting input data to component space, | ||
where `C` is components and `T` is the same as in `data` | ||
fixed_seed : :obj:`int` | ||
Random seed from final decomposition. | ||
|
||
""" | ||
if n_robust_runs > 200: | ||
LGR.warning( | ||
"The selected n_robust_runs is a very big number! The process will take a long time!" | ||
) | ||
|
||
RepLGR.info("RobustICA package was used for ICA decomposition \\citep{Anglada2022}.") | ||
|
||
if fixed_seed == -1: | ||
fixed_seed = np.random.randint(low=1, high=1000) | ||
|
||
try: | ||
rica = RobustICA( | ||
n_components=n_components, | ||
robust_runs=n_robust_runs, | ||
whiten="arbitrary-variance", | ||
max_iter=max_it, | ||
random_state=fixed_seed, | ||
robust_dimreduce=False, | ||
fun="logcosh", | ||
robust_method="DBSCAN", | ||
) | ||
|
||
S, mmix = rica.fit_transform(data) | ||
q = rica.evaluate_clustering( | ||
rica.S_all, rica.clustering.labels_, rica.signs_, rica.orientation_ | ||
) | ||
|
||
except: | ||
rica = RobustICA( | ||
n_components=n_components, | ||
robust_runs=n_robust_runs, | ||
whiten="arbitrary-variance", | ||
max_iter=max_it, | ||
random_state=fixed_seed, | ||
robust_dimreduce=False, | ||
fun="logcosh", | ||
robust_method="AgglomerativeClustering", | ||
) | ||
|
||
S, mmix = rica.fit_transform(data) | ||
q = rica.evaluate_clustering( | ||
rica.S_all, rica.clustering.labels_, rica.signs_, rica.orientation_ | ||
) | ||
|
||
iq = np.array(np.mean(q[q["cluster_id"] >= 0].iq)) # The cluster labeled -1 is noise | ||
|
||
if iq < 0.6: | ||
LGR.warning( | ||
"The resultant mean Index Quality is low. It is recommended to rerun the " | ||
"process with a different seed." | ||
BahmanTahayori marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) | ||
|
||
mmix = mmix[:, q["cluster_id"] >= 0] | ||
mmix = stats.zscore(mmix, axis=0) | ||
|
||
LGR.info( | ||
"RobustICA with {0} robust runs and seed {1} was used. " | ||
"The mean Index Quality is {2}".format(n_robust_runs, fixed_seed, iq) | ||
) | ||
return mmix, fixed_seed | ||
|
||
|
||
def f_ica(data, n_components, fixed_seed, maxit, maxrestart): | ||
""" | ||
Perform FastICA on `data` and returns mixing matrix | ||
|
||
Parameters | ||
---------- | ||
data : (S x T) :obj:`numpy.ndarray` | ||
Dimensionally reduced optimally combined functional data, where `S` is | ||
samples and `T` is time | ||
n_components : :obj:`int` | ||
Number of components retained from PCA decomposition | ||
fixed_seed : :obj:`int` | ||
Seed for ensuring reproducibility of ICA results | ||
maxit : :obj:`int`, optional | ||
Maximum number of iterations for ICA. Default is 500. | ||
maxrestart : :obj:`int`, optional | ||
Maximum number of attempted decompositions to perform with different | ||
random seeds. ICA will stop running if there is convergence prior to | ||
reaching this limit. Default is 10. | ||
|
||
Returns | ||
------- | ||
mmix : (T x C) :obj:`numpy.ndarray` | ||
Z-scored mixing matrix for converting input data to component space, | ||
where `C` is components and `T` is the same as in `data` | ||
fixed_seed : :obj:`int` | ||
Random seed from final decomposition. | ||
|
||
Notes | ||
----- | ||
Uses `sklearn` implementation of FastICA for decomposition | ||
""" | ||
if fixed_seed == -1: | ||
fixed_seed = np.random.randint(low=1, high=1000) | ||
|
||
|
@@ -69,18 +233,19 @@ def tedica(data, n_components, fixed_seed, maxit=500, maxrestart=10): | |
w = list(filter(lambda i: issubclass(i.category, UserWarning), w)) | ||
if len(w): | ||
LGR.warning( | ||
f"ICA with random seed {fixed_seed} failed to converge after {ica.n_iter_} " | ||
"iterations" | ||
"ICA with random seed {0} failed to converge after {1} " | ||
"iterations".format(fixed_seed, ica.n_iter_) | ||
) | ||
if i_attempt < maxrestart - 1: | ||
fixed_seed += 1 | ||
LGR.warning(f"Random seed updated to {fixed_seed}") | ||
LGR.warning("Random seed updated to {0}".format(fixed_seed)) | ||
else: | ||
LGR.info( | ||
f"ICA with random seed {fixed_seed} converged in {ica.n_iter_} iterations" | ||
"ICA with random seed {0} converged in {1} " | ||
"iterations".format(fixed_seed, ica.n_iter_) | ||
) | ||
break | ||
|
||
mmix = ica.mixing_ | ||
mmix = stats.zscore(mmix, axis=0) | ||
return mmix, fixed_seed | ||
return mmix, fixed_seed | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. GitHub shows a red circled dash at the end of some files. This indicates that the file does not contain a newline at the end. The final non-empty line of the file should contain a newline, but there should be no empty lines beyond that. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I know I mentioned that there should be some kind of reference to the resolution of the fixed seed in robustica issue, but this doesn't work. If you look at the code naive to this history, it's a very odd code comment that seemingly bears no relevance to the surrounding code.
What I was advocating was something like, for instance, if it was the case that you perhaps made some code change where previously you were forbidding use of a fixed seed with robustica but it was now permitted, perhaps with an increment to the requisite robustica version number, then the commit making that change should include in its commit message a description of and hyperlink to that upstream change.