From 628bf64e5f59bddc138731aa18d359163d6cf4a4 Mon Sep 17 00:00:00 2001 From: Ross Markello Date: Fri, 27 Apr 2018 10:50:19 -0400 Subject: [PATCH 01/18] [STY] Mostly stylistic, a few enhancements... Modified some code in utils.py to be more readable and to accept various input types (e.g., lists of data files, arrays, etc.). In the process, changed a some doc-strings, modified the print statements to be logging statements, and made a few comments for identification of things that need to be changed to better integrate surface files. --- tedana/interfaces/t2smap.py | 11 +- tedana/interfaces/tedana.py | 189 +++++++++++++++--------------- tedana/utils/utils.py | 228 +++++++++++++++++++++--------------- 3 files changed, 237 insertions(+), 191 deletions(-) diff --git a/tedana/interfaces/t2smap.py b/tedana/interfaces/t2smap.py index bfbe67e76..68ae8ec2c 100644 --- a/tedana/interfaces/t2smap.py +++ b/tedana/interfaces/t2smap.py @@ -3,6 +3,9 @@ from tedana.utils import (niwrite, cat2echos, makeadmask, unmask, fmask) +import logging +lgr = logging.getLogger(__name__) + def t2sadmap(catd, mask, tes, masksum, start_echo): """ @@ -120,16 +123,16 @@ def main(options): catd = cat2echos(catim.get_data(), ne) nx, ny, nz, Ne, nt = catd.shape - print("++ Computing Mask") - mask, masksum = makeadmask(catd, min=False, getsum=True) + lgr.info('++ Computing Mask') + mask, masksum = makeadmask(catd, minimum=False, getsum=True) - print("++ Computing Adaptive T2* map") + lgr.info('++ Computing Adaptive T2* map') t2s, s0, t2ss, s0vs, t2saf, s0vaf = t2sadmap(catd, mask, tes, masksum, 2) niwrite(masksum, aff, 'masksum%s.nii' % suf) niwrite(t2ss, aff, 't2ss%s.nii' % suf) niwrite(s0vs, aff, 's0vs%s.nii' % suf) - print("++ Computing optimal combination") + lgr.info('++ Computing optimal combination') tsoc = np.array(optcom(catd, t2s, tes, diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py index 139279ef4..b69b99dc3 100644 --- a/tedana/interfaces/tedana.py +++ b/tedana/interfaces/tedana.py @@ -1,4 +1,5 @@ import os +import shutil import sys import pickle import textwrap @@ -11,6 +12,9 @@ makeadmask, fmask, unmask, fitgaussian, niwrite, dice, andb) +import logging +lgr = logging.getLogger(__name__) + """ PROCEDURE 2 : Computes ME-PCA and ME-ICA -Computes T2* map @@ -72,34 +76,27 @@ def do_svm(X_train, y_train, X_test, svmtype=0): def spatclust(data, mask, csize, thr, header, aff, infile=None, dindex=0, tindex=0): """ + Thresholds and spatially clusters `data` Parameters ---------- - data : - + data : array_like mask : - - csize : - + csize : int thr : - header : - aff : - infile : - dindex : - tindex : - Returns ------- clustered : + """ + # threshold image - """ if infile is None: data = data.copy() data[data < thr] = 0 @@ -111,6 +108,7 @@ def spatclust(data, mask, csize, thr, header, aff, infile=None, dindex=0, else: addopts = '-1dindex {0} -1tindex {1}'.format(str(dindex), str(tindex)) + # cmd_str = '3dmerge -overwrite {0} -dxyz=1 -1clust 1 {1:d} ' \ '-1thresh {2:.02f} -prefix __clout.nii.gz {3}' os.system(cmd_str.format(addopts, int(csize), float(thr), infile)) @@ -119,7 +117,8 @@ def spatclust(data, mask, csize, thr, header, aff, infile=None, dindex=0, def rankvec(vals): - """Returns ranks of array. + """ + Returns ranks of array Parameters ---------- @@ -133,7 +132,7 @@ def rankvec(vals): """ try: vals = np.array(vals) - except: + except: # would this ever happen???? raise IOError('Input vals is not array_like') if len(vals.shape) != 1: @@ -280,22 +279,22 @@ def getelbow_aggr(ks, val=False): return maxcurv -def getfbounds(ne): +def getfbounds(n_echos): """ Parameters ---------- - ne : int + n_echos : int Number of echoes. Returns ------- """ - if not isinstance(ne, int): - raise IOError('Input ne must be int') - elif ne <= 0: - raise ValueError('Input ne must be greater than 0') - idx = ne - 1 + if not isinstance(n_echos, int): + raise IOError('Input n_echos must be int') + elif n_echos <= 0: + raise ValueError('Input n_echos must be greater than 0') + idx = n_echos - 1 F05s = [None, None, 18.5, 10.1, 7.7, 6.6, 6.0, 5.6, 5.3, 5.1, 5.0] F025s = [None, None, 38.5, 17.4, 12.2, 10, 8.8, 8.1, 7.6, 7.2, 6.9] @@ -308,12 +307,12 @@ def eimask(dd, ees=None): ees = range(dd.shape[1]) imask = np.zeros([dd.shape[0], len(ees)]) for ee in ees: - print(ee) + lgr.info(ee) lthr = 0.001 * stats.scoreatpercentile(dd[:, ee, :].flatten(), 98, interpolation_method='lower') hthr = 5 * stats.scoreatpercentile(dd[:, ee, :].flatten(), 98, interpolation_method='lower') - print(lthr, hthr) + lgr.info(lthr, hthr) imask[dd[:, ee, :].mean(1) > lthr, ee] = 1 imask[dd[:, ee, :].mean(1) > hthr, ee] = 0 return imask @@ -399,16 +398,16 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, head, totvar_norm = (WTS**2).sum() # Compute Betas and means over TEs for TE-dependence analysis - Ne = len(tes) - betas = cat2echos(get_coeffs(uncat2echos(catd, Ne), - np.tile(mask, (1, 1, Ne)), - mmix), Ne) - nx, ny, nz, Ne, nc = betas.shape + n_echos = len(tes) + betas = cat2echos(get_coeffs(uncat2echos(catd), + np.tile(mask, (1, 1, n_echos)), + mmix), n_echos) + nx, ny, nz, n_echos, nc = betas.shape Nm = mask.sum() NmD = (t2s != 0).sum() mu = catd.mean(axis=-1) - tes = np.reshape(tes, (Ne, 1)) - fmin, fmid, fmax = getfbounds(Ne) + tes = np.reshape(tes, (n_echos, 1)) + fmin, fmid, fmax = getfbounds(n_echos) # Mask arrays mumask = fmask(mu, t2s != 0) @@ -448,14 +447,14 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, head, # S0 Model coeffs_S0 = (B * X1).sum(axis=0) / (X1**2).sum(axis=0) - SSE_S0 = (B - X1 * np.tile(coeffs_S0, (Ne, 1)))**2 + SSE_S0 = (B - X1 * np.tile(coeffs_S0, (n_echos, 1)))**2 SSE_S0 = SSE_S0.sum(axis=0) F_S0 = (alpha - SSE_S0) * 2 / (SSE_S0) F_S0_maps[:, i] = F_S0 # R2 Model coeffs_R2 = (B * X2).sum(axis=0) / (X2**2).sum(axis=0) - SSE_R2 = (B - X2 * np.tile(coeffs_R2, (Ne, 1)))**2 + SSE_R2 = (B - X2 * np.tile(coeffs_R2, (n_echos, 1)))**2 SSE_R2 = SSE_R2.sum(axis=0) F_R2 = (alpha - SSE_R2)*2/(SSE_R2) F_R2_maps[:, i] = F_R2 @@ -522,8 +521,10 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, head, csize = np.max([int(Nm * 0.0005) + 5, 20]) # Do simple clustering on F + # TODO: can be replaced with nilearn.image.threshold_img os.system("3dcalc -overwrite -a %s[1..2] -expr 'a*step(a-%i)' -prefix .fcl_in.nii.gz " "-overwrite" % (ccname, fmin)) + # TODO: can be replaced with nilearn.regions.connected_regions os.system('3dmerge -overwrite -dxyz=1 -1clust 1 %i -doall ' '-prefix .fcl_out.nii.gz .fcl_in.nii.gz' % (csize)) sel = fmask(nib.load('.fcl_out.nii.gz').get_data(), t2s != 0) != 0 @@ -574,7 +575,7 @@ def selcomps(seldict, mmix, head, manacc, debug=False, olevel=2, oversion=99, if filecsdata: import bz2 if seldict is not None: - print("Saving component selection data") + lgr.info('Saving component selection data') csstate_f = bz2.BZ2File('compseldata.pklbz', 'wb') pickle.dump(seldict, csstate_f) csstate_f.close() @@ -584,7 +585,7 @@ def selcomps(seldict, mmix, head, manacc, debug=False, olevel=2, oversion=99, seldict = pickle.load(csstate_f) csstate_f.close() except FileNotFoundError: - print("No component data found!") + lgr.info('No component data found!') return None # Dump dictionary into variable names @@ -643,7 +644,7 @@ def selcomps(seldict, mmix, head, manacc, debug=False, olevel=2, oversion=99, mwu = stats.norm.ppf(stats.mannwhitneyu(signal_FR2_Z, noise_FR2_Z)[1]) tt_table[ii, 0] = np.abs(mwu) * ttest[0] / np.abs(ttest[0]) tt_table[ii, 1] = ttest[1] - except: + except: # TODO: what is the error that might be caught here? pass tt_table[np.isnan(tt_table)] = 0 tt_table[np.isinf(tt_table[:, 0]), 0] = np.percentile(tt_table[~np.isinf(tt_table[:, 0]), 0], @@ -711,7 +712,7 @@ def selcomps(seldict, mmix, head, manacc, debug=False, olevel=2, oversion=99, """ # epsmap is [index,level of overlap with dicemask, # number of high Rho components] - F05, F025, F01 = getfbounds(ne) + F05, F025, F01 = getfbounds(n_echos) epsmap = [] Rhos_sorted = np.array(sorted(Rhos))[::-1] # Make an initial guess as to number of good components based on @@ -721,11 +722,11 @@ def selcomps(seldict, mmix, head, manacc, debug=False, olevel=2, oversion=99, getelbow_cons(Kappas), getelbow_aggr(Kappas)] Kelbowval = np.median([getelbow_mod(Kappas, val=True), getelbow_cons(Kappas, val=True), - getelbow_aggr(Kappas, val=True)] + list(getfbounds(ne))) + getelbow_aggr(Kappas, val=True)] + list(getfbounds(n_echos))) Khighelbowval = stats.scoreatpercentile([getelbow_mod(Kappas, val=True), getelbow_cons(Kappas, val=True), getelbow_aggr(Kappas, val=True)] + - list(getfbounds(ne)), + list(getfbounds(n_echos)), 75, interpolation_method='lower') KRcut = np.median(KRcutguesses) # only use exclusive when inclusive is extremely inclusive - double KRcut @@ -779,7 +780,7 @@ def selcomps(seldict, mmix, head, manacc, debug=False, olevel=2, oversion=99, nc[Rhos > getelbow_mod(Rhos_sorted, val=True)]).shape[0]]) if debug: - print("found solution", ii, db.labels_) + lgr.info('found solution', ii, db.labels_) db = None epsmap = np.array(epsmap) @@ -789,7 +790,7 @@ def selcomps(seldict, mmix, head, manacc, debug=False, olevel=2, oversion=99, # Select index that maximizes Dice with guessmask but first # minimizes number of higher Rho components ii = int(epsmap[np.argmax(epsmap[epsmap[:, 2] == np.min(epsmap[:, 2]), 1], 0), 0]) - print('Component selection tuning: ', epsmap[:, 1].max()) + lgr.info('Component selection tuning: ', epsmap[:, 1].max()) db = DBSCAN(eps=.005+ii*.005, min_samples=3).fit(fz.T) ncl = nc[db.labels_ == 0] ncl = np.setdiff1d(ncl, rej) @@ -799,7 +800,7 @@ def selcomps(seldict, mmix, head, manacc, debug=False, olevel=2, oversion=99, to_clf = np.setdiff1d(nc, np.union1d(ncl, rej)) if len(group0) == 0 or len(group0) < len(KRguess) * .5: dbscanfailed = True - print("DBSCAN based guess failed. Using elbow guess method.") + lgr.info('DBSCAN based guess failed. Using elbow guess method.') ncl = np.setdiff1d(np.setdiff1d(nc[KRelbow == 2], rej), np.union1d(nc[tt_table[:, 0] < tt_lim], np.union1d(np.union1d(nc[spz > 1], @@ -810,8 +811,8 @@ def selcomps(seldict, mmix, head, manacc, debug=False, olevel=2, oversion=99, group_n1 = [] to_clf = np.setdiff1d(nc, np.union1d(group0, rej)) if len(group0) < 2 or (len(group0) < 4 and float(len(rej))/len(group0) > 3): - print("WARNING: Extremely limited reliable BOLD signal space. " - "Not filtering further into midk etc.") + lgr.info('WARNING: Extremely limited reliable BOLD signal space. ' + 'Not filtering further into midk etc.') midkfailed = True min_acc = np.array([]) if len(group0) != 0: @@ -1021,23 +1022,23 @@ def selcomps(seldict, mmix, head, manacc, debug=False, olevel=2, oversion=99, def tedpca(combmode, mask, stabilize, head, ste=0, mlepca=True): - nx, ny, nz, ne, nt = catd.shape + nx, ny, nz, n_echos, nt = catd.shape ste = np.array([int(ee) for ee in str(ste).split(',')]) if len(ste) == 1 and ste[0] == -1: - print("-Computing PCA of optimally combined multi-echo data") + lgr.info('-Computing PCA of optimally combined multi-echo data') OCmask = make_mask(OCcatd[:, :, :, np.newaxis, :]) d = fmask(OCcatd, OCmask) eim = eimask(d[:, np.newaxis, :]) eim = eim[:, 0] == 1 d = d[eim, :] elif len(ste) == 1 and ste[0] == 0: - print("-Computing PCA of spatially concatenated multi-echo data") - ste = np.arange(ne) + lgr.info('-Computing PCA of spatially concatenated multi-echo data') + ste = np.arange(n_echos) d = np.float64(fmask(catd, mask)) eim = eimask(d) == 1 d = d[eim] else: - print("-Computing PCA of TE #%s" % ','.join([str(ee) for ee in ste])) + lgr.info('-Computing PCA of TE #%s' % ','.join([str(ee) for ee in ste])) d = np.float64(np.concatenate([fmask(catd[:, :, :, ee, :], mask)[:, np.newaxis, :] for ee in ste-1], axis=1)) eim = eimask(d) == 1 @@ -1089,17 +1090,17 @@ def tedpca(combmode, mask, stabilize, head, ste=0, mlepca=True): ctb = np.vstack([ctb.T[0:3], sp]).T # Save state - print("Saving PCA") + lgr.info('Saving PCA') pcastate = {'u': u, 's': s, 'v': v, 'ctb': ctb, 'eigelb': eigelb, 'spmin': spmin, 'spcum': spcum} try: with open('pcastate.pkl', 'wb') as handle: pickle.dump(pcastate, handle) except TypeError: - print("Could not save PCA solution!") + lgr.info('Could not save PCA solution!') else: # if loading existing state - print("Loading PCA") + lgr.info('Loading PCA') with open('pcastate.pkl', 'rb') as handle: pcastate = pickle.load(handle) (u, s, v, ctb, @@ -1112,7 +1113,7 @@ def tedpca(combmode, mask, stabilize, head, ste=0, mlepca=True): kappas = ctb[ctb[:, 1].argsort(), 1] rhos = ctb[ctb[:, 2].argsort(), 2] - fmin, fmid, fmax = getfbounds(ne) + fmin, fmid, fmax = getfbounds(n_echos) kappa_thr = np.average(sorted([fmin, getelbow_mod(kappas, val=True)/2, fmid]), weights=[kdaw, 1, 1]) rho_thr = np.average(sorted([fmin, getelbow_cons(rhos, val=True)/2, fmid]), @@ -1144,7 +1145,7 @@ def tedpca(combmode, mask, stabilize, head, ste=0, mlepca=True): dd = u.dot(np.diag(s*np.array(pcsel, dtype=np.int))).dot(v) nc = s[pcsel].shape[0] - print("--Selected %i components. Minimum Kappa=%0.2f Rho=%0.2f" % (nc, kappa_thr, rho_thr)) + lgr.info('--Selected %i components. Minimum Kappa=%0.2f Rho=%0.2f' % (nc, kappa_thr, rho_thr)) dd = ((dd.T - dd.T.mean(0)) / dd.T.std(0)).T # Variance normalize timeseries dd = (dd - dd.mean()) / dd.std() # Variance normalize everything @@ -1171,7 +1172,7 @@ def tedica(nc, dd, conv, fixed_seed, cost, final_cost): return mmix -def gscontrol_raw(OCcatd, head, Ne, dtrank=4): +def gscontrol_raw(OCcatd, head, n_echos, dtrank=4): """ This function uses the spatial global signal estimation approach to modify catd (global variable) to removal global signal out of individual @@ -1180,7 +1181,7 @@ def gscontrol_raw(OCcatd, head, Ne, dtrank=4): polynomial basis of order=0 and degree=dtrank. """ - print("++ Applying amplitude-based T1 equilibration correction") + lgr.info('++ Applying amplitude-based T1 equilibration correction') # Legendre polynomial basis for denoising from scipy.special import lpmv @@ -1216,7 +1217,7 @@ def gscontrol_raw(OCcatd, head, Ne, dtrank=4): niwrite(OCcatd, aff, 'tsoc_nogs.nii', head) # Project glbase out of each echo - for ii in range(Ne): + for ii in range(n_echos): dat = catd[:, :, :, ii, :][Gmask] sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T) e_nogs = dat - np.dot(np.atleast_2d(sol[0][dtrank]).T, @@ -1245,7 +1246,7 @@ def gscontrol_mmix(mmix, acc, rej, midk, empty, head): bold_ts = np.dot(solG[0].T[:, acc], mmix[:, acc].T) sphis = bold_ts.min(-1) sphis -= sphis.mean() - print(sphis.shape) + lgr.info(sphis.shape) niwrite(unmask(sphis, mask), aff, 'sphis_hik.nii', head) """ @@ -1290,7 +1291,7 @@ def write_split_ts(data, comptable, mmix, acc, rej, midk, head, suffix=''): mask, mmix), mask) dmdata = mdata.T-mdata.T.mean(0) varexpl = (1-((dmdata.T-betas.dot(mmix.T))**2.).sum()/(dmdata**2.).sum())*100 - print('Variance explained: ', varexpl, '%') + lgr.info('Variance explained: ', varexpl, '%') midkts = betas[:, midk].dot(mmix.T[midk, :]) lowkts = betas[:, rej].dot(mmix.T[rej, :]) if len(acc) != 0: @@ -1361,31 +1362,31 @@ def writect(comptable, nt, acc, rej, midk, empty, ctname='', varexpl='-1'): def writeresults(OCcatd, comptable, mmix, nt, acc, rej, midk, empty, head): - print("++ Writing optimally combined time series") + lgr.info('++ Writing optimally combined time series') ts = OCcatd niwrite(ts, aff, 'ts_OC.nii', head) - print("++ Writing Kappa-filtered optimally combined timeseries") + lgr.info('++ Writing Kappa-filtered optimally combined timeseries') varexpl = write_split_ts(ts, comptable, mmix, acc, rej, midk, head, suffix='OC') - print("++ Writing signal versions of components") + lgr.info('++ Writing signal versions of components') ts_B = get_coeffs(ts, mask, mmix) niwrite(ts_B[:, :, :, :], aff, '_'.join(['betas', 'OC']) + '.nii', head) if len(acc) != 0: niwrite(ts_B[:, :, :, acc], aff, '_'.join(['betas_hik', 'OC'])+'.nii', head) - print("++ Writing optimally combined high-Kappa features") + lgr.info('++ Writing optimally combined high-Kappa features') writefeats(split_ts(ts, comptable, mmix, acc, rej, midk)[0], mmix[:, acc], mask, head, suffix='OC2') - print("++ Writing component table") + lgr.info('++ Writing component table') writect(comptable, nt, acc, rej, midk, empty, ctname='comp_table.txt', varexpl=varexpl) def writeresults_echoes(acc, rej, midk, head, comptable, mmix): - for ii in range(ne): - print("++ Writing Kappa-filtered TE#%i timeseries" % (ii+1)) + for ii in range(n_echos): + lgr.info('++ Writing Kappa-filtered TE#%i timeseries' % (ii+1)) write_split_ts(catd[:, :, :, ii, :], comptable, mmix, acc, rej, midk, head, suffix='e%i' % (ii+1)) @@ -1400,19 +1401,23 @@ def main(options): stabilize=False, fout=False, filecsdata=False, label=None, fixed_seed=42 """ - global tes, ne, catd, head, aff - tes = [float(te) for te in options.tes] - ne = len(tes) - catim = nib.load(options.data[0]) - head = catim.get_header() - head.extensions = [] - head.set_sform(head.get_sform(), code=1) - aff = catim.get_affine() - catd = cat2echos(catim.get_data(), ne) - nx, ny, nz, Ne, nt = catd.shape + # off to a bad start with globals + global tes, n_echos, catd, head, aff + tes = [float(te) for te in options.tes] + n_echos = len(tes) - # Parse options, prepare output directory + # get some info on the input data + # TODO: only works on nifti + catim = nib.load(options.data[0]) + head = catim.header + head.extensions = [] # clear extension info in header + head.set_sform(head.get_sform(), code=1) # reset sform code + aff = catim.get_affine() # TODO: gifti has no affine + catd = cat2echos(options.data, n_echos=n_echos) + nx, ny, nz, n_echos, nt = catd.shape + + # parse options, prepare output directory if options.fout: options.fout = aff else: @@ -1427,36 +1432,30 @@ def main(options): rdaw = float(options.rdaw) if options.label is not None: - dirname = '%s' % '.'.join(['TED', options.label]) + dirname = '.'.join(['TED', options.label]) else: dirname = 'TED' - os.system('mkdir %s' % dirname) + os.mkdir(dirname) if options.mixm is not None: try: - os.system('cp %s %s/meica_mix.1D; cp %s %s/%s' % (options.mixm, - dirname, - options.mixm, - dirname, - os.path.basename(options.mixm))) - except: + shutil.copyfile(options.mixm, os.path.join(dirname, 'meica_mix.1D')) + shutil.copyfile(options.mixm, os.path.join(dirname, os.path.basename(options.mixm))) + except shutil.Error: pass if options.ctab is not None: try: - os.system('cp %s %s/comp_table.txt; cp %s %s/%s' % (options.mixm, - dirname, - options.mixm, - dirname, - os.path.basename(options.mixm))) - except: + shutil.copyfile(options.mixm, os.path.join(dirname, 'comp_table.txt')) + shutil.copyfile(options.mixm, os.path.join(dirname, os.path.basename(options.mixm))) + except shutil.Error: pass os.chdir(dirname) - print("++ Computing Mask") + lgr.info('++ Computing Mask') global mask mask, masksum = makeadmask(catd, minimum=False, getsum=True) - print("++ Computing T2* map") + lgr.info('++ Computing T2* map') global t2s, s0, t2ss, s0s, t2sG, s0G t2s, s0, t2ss, s0s, t2sG, s0G = t2sadmap(catd, mask, tes, masksum, 1) @@ -1480,7 +1479,7 @@ def main(options): catd, OCcatd = gscontrol_raw(OCcatd, head, len(tes)) if options.mixm is None: - print("++ Doing ME-PCA and ME-ICA") + lgr.info('++ Doing ME-PCA and ME-ICA') nc, dd = tedpca(combmode, mask, stabilize, head, ste=options.ste) @@ -1520,8 +1519,8 @@ def main(options): acc, rej, midk, empty = ctabsel(options.ctab) if len(acc) == 0: - print("** WARNING! No BOLD components detected!!! ** \n" - "Please check data and results!") + lgr.info('** WARNING! No BOLD components detected!!! \n' + '** Please check data and results!') writeresults(OCcatd, comptable, mmix, nt, acc, rej, midk, empty, head) gscontrol_mmix(mmix, acc, rej, midk, empty, head) diff --git a/tedana/utils/utils.py b/tedana/utils/utils.py index aa48d57b9..f697fb730 100644 --- a/tedana/utils/utils.py +++ b/tedana/utils/utils.py @@ -1,110 +1,143 @@ """Utilities for meica package""" import numpy as np import nibabel as nib +from nibabel.filename_parser import split_add_ext +from nilearn._utils import check_niimg +import nilearn.masking as nimask from scipy.optimize import leastsq -from scipy.stats import scoreatpercentile from ..due import due, BibTeX -def cat2echos(data, Ne): +# TODO: Currently only accepts niftis -- do we need it to accept giftis? +def cat2echos(data, n_echos=None): """ - Separates z- and echo-axis in `data` + Coerces input `data` files to required array output Parameters ---------- - data : array_like - Array of shape (nx, ny, nz*Ne, nt) - Ne : int - Number of echoes that were in original (uncombined) data array + data : (X x Y x M x T) array_like or list-of-niimg-like + Input multi-echo data array or independent echo files, where M is Z * + the number of echos + n_echos : int + Number of echos Returns ------- - ndarray - Array of shape (nx, ny, nz, Ne, nt) + fdata : (X x Y x Z x E x T) np.ndarray + Where `X`, `Y`, `Z` are spatial dims, `E` is echos, and `T` is time """ - nx, ny = data.shape[0:2] - nz = data.shape[2] // Ne - if len(data.shape) > 3: - nt = data.shape[3] - else: - nt = 1 - return np.reshape(data, (nx, ny, nz, Ne, nt), order='F') + if isinstance(data, list): + # the individual echo files were provided + if len(data) > 2: + fdata = np.stack([nib.load(f).get_data() for f in data], axis=3) + # ensure we have a time dimension + if fdata.ndim < 5: + fdata = fdata[..., np.newaxis] + return fdata + # a z-concatenated file was provided (hopefully) + elif len(data) == 1: + if n_echos is None: + raise ValueError('Number of echos `n_echos` must be specified ' + 'if z-concatenated data file provided.') + data = nib.load(data[0]).get_data() + else: + raise ValueError('Cannot run `tedana` with only two echos: ' + '{}'.format(data)) + # either an array or a z-concatenated file was provided + nx, ny, nz = data.shape[:2], data.shape[2] // n_echos + fdata = data.reshape(nx, ny, nz, n_echos, -1, order='F') -def makeadmask(cdat, minimum=True, getsum=False): + return fdata + + +def uncat2echos(data): """ - Create a mask. + Combines Z- and echo-axis in `data` + + Parameters + ---------- + data : (X x Y x Z x E x T) array_like + Multi-echo data array + + Returns + ------- + fdata : (X x Y x M x T) np.ndarray + Z-concatenated multi-echo data array, where M is Z * number of echos """ - nx, ny, nz, Ne, _ = cdat.shape - mask = np.ones((nx, ny, nz), dtype=np.bool) + if data.ndim < 4: + raise ValueError('Input data must have at least four dimensions; ' + 'provided data has only {0}'.format(data.ndim)) - if minimum: - mask = cdat.prod(axis=-1).prod(-1) != 0 - return mask - else: - # Make a map of longest echo that a voxel can be sampled with, - # with minimum value of map as X value of voxel that has median - # value in the 1st echo. N.b. larger factor leads to bias to lower TEs - emeans = cdat.mean(-1) - medv = emeans[:, :, :, 0] == scoreatpercentile(emeans[:, :, :, 0][emeans[:, :, :, 0] != 0], - 33, interpolation_method='higher') - lthrs = np.squeeze(np.array([emeans[:, :, :, ee][medv] / 3 for ee in range(Ne)])) - - if len(lthrs.shape) == 1: - lthrs = np.atleast_2d(lthrs).T - lthrs = lthrs[:, lthrs.sum(0).argmax()] - - mthr = np.ones([nx, ny, nz, Ne]) - for i_echo in range(Ne): - mthr[:, :, :, i_echo] *= lthrs[i_echo] - mthr = np.abs(emeans) > mthr - masksum = np.array(mthr, dtype=np.int).sum(-1) - mask = masksum != 0 - if getsum: - return mask, masksum - else: - return mask + (nx, ny), nz = data.shape[:2], np.prod(data.shape[2:4]) + return data.reshape(nx, ny, nz, -1, order='F') -def uncat2echos(data, Ne): +def makeadmask(data, minimum=True, getsum=False): """ - Combines z- and echo-axis in `data` + Makes map of `data` specifying longest echo a voxel can be sampled with Parameters ---------- - data : array_like - Array of shape (nx, ny, nz, Ne, nt) - Ne : int - Number of echoes; should be equal to `data.shape[3]` + data : (X x Y x Z x E x T) array_like + Where `X`, `Y`, `Z` are spatial dims, `E` is echos, and `T` is time + minimum : bool, optional + Use `make_min_mask` instead of generating a map with echo-specific. + Default: True + getsum : bool, optional + Return `masksum` in addition to mask. Default: False Returns ------- - ndarray - Array of shape (nx, ny, nz*Ne, nt) + mask : (X x Y x Z) np.ndarray + Boolean array of voxels that have sufficient signal in at least one + echo + masksum : (X x Y x Z) np.ndarray + Valued array indicating the number of echos with sufficient signal in a + given voxel. Only returned if `getsum = True` """ - nx, ny = data.shape[0:2] - nz = data.shape[2] * Ne - if len(data.shape) > 4: - nt = data.shape[4] - else: - nt = 1 - return np.reshape(data, (nx, ny, nz, nt), order='F') - - -def make_mask(catdata): + if minimum: + return make_min_mask(data) + + x, y, z, n_echos, _ = data.shape + emeans = data.mean(axis=-1) + first_echo = emeans[:, :, :, 0] + # make a map of longest echo with which a voxel can be sampled, with min + # value of map as X value of voxel that has median value in the 1st echo + # N.B. larger factor (%ile??) leads to bias to lower TEs + perc33 = np.percentile(first_echo[first_echo.nonzero()], 33, + interpolation='higher') # why take 33rd %ile? + medv = (first_echo == perc33) + lthrs = np.vstack([emeans[:, :, :, echo][medv] / 3 for echo in + range(n_echos)]) # why divide by three? + lthrs = lthrs[:, lthrs.sum(0).argmax()] + mthr = np.ones(data.shape[:-1]) + for echo in range(n_echos): + mthr[:, :, :, echo] *= lthrs[echo] + + masksum = (np.abs(emeans) > mthr).astype('int').sum(axis=-1) + mask = (masksum != 0) + + if getsum: + return mask, masksum + + return mask + + +def make_min_mask(data): """ - Generates a 3D mask of `catdata` + Generates a 3D mask of `data` Only voxels that are consistently (i.e., across time AND echoes) non-zero - in `catdata` are True in output + in `data` are True in output Parameters ---------- - catdata : (X x Y x Z x E x T) array_like + data : (X x Y x Z x E x T) array_like Multi-echo data array, where X, Y, Z are spatial dimensions, E corresponds to individual echo data, and T is time @@ -114,8 +147,8 @@ def make_mask(catdata): Boolean array """ - catdata = np.asarray(catdata) - return catdata.prod(axis=-1).prod(axis=-1).astype('bool') + data = np.asarray(data) + return data.prod(axis=-1).prod(axis=-1).astype('bool') def make_opt_com(medata): @@ -130,34 +163,45 @@ def make_opt_com(medata): pass -def fmask(data, mask): +def fmask(data, mask=None): """ Masks `data` with non-zero entries of `mask` Parameters ---------- - data : array_like - Array of shape (nx, ny, nz[, Ne[, nt]]) - mask : array_like - Boolean array of shape (nx, ny, nz) + data : (X x Y x Z [x E [x T]) array_like or niimg-like object + Data array or data file to be masked + mask : (X x Y x Z) array_like or niimg-like object + Boolean array or mask file Returns ------- - ndarray - Masked array of shape (nx*ny*nz[, Ne[, nt]]) + fdata : (V [x E] x T) np.ndarray + Masked `data`, where `V` is voxels/vertices, `E` is echoes, and `T` is + time """ - s = data.shape - - N = s[0] * s[1] * s[2] - news = [] - news.append(N) + if mask is not None and not type(data) == type(mask): + raise TypeError('Provided `data` and `mask` must be of same type.') - if len(s) > 3: - news.extend(s[3:]) - - tmp1 = np.reshape(data, news) - fdata = tmp1.compress((mask > 0).ravel(), axis=0) + if isinstance(data, str): + root, ext, addext = split_add_ext(data) + if ext == '.gii': + # mask need not apply for gii files + fdata = np.column_stack([f.data for f in nib.load(data).darrays]) + else: + # use nilearn for other files + data = check_niimg(data) + if mask is not None: + # TODO: check that this uses same order to flatten + fdata = nimask.apply_mask(data, mask).T + else: + fdata = data.get_data().reshape((-1,) + data.shape[3:]) + elif isinstance(data, np.ndarray): + # flatten data over first three dimensions and apply mask + fdata = data.reshape((-1,) + data.shape[3:]) + if mask is not None: + fdata = fdata[mask.flatten() > 0] return fdata.squeeze() @@ -168,15 +212,15 @@ def unmask(data, mask): Parameters ---------- - data : array_like - Masked array of shape (nx*ny*nz[, Ne[, nt]]) - mask : array_like - Boolean array of shape (nx, ny, nz) + data : (V x E x T) array_like + Masked array, where V is voxels flattened across spatial dimensions + mask : (X x Y x Z) array_like + Boolean array that was used to mask `data` Returns ------- - ndarray - Array of shape (nx, ny, nz[, Ne[, nt]]) + fdata : (X x Y x Z x E x T) np.ndarray + Unmasked `data` array with spatial dimensions intact """ M = (mask != 0).ravel() From cdc8cf08e0da5b5f898155260226cc4ea52fb40a Mon Sep 17 00:00:00 2001 From: Ross Markello Date: Fri, 27 Apr 2018 11:35:21 -0400 Subject: [PATCH 02/18] [FIX] Screwed up nibabel import --- tedana/utils/utils.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/tedana/utils/utils.py b/tedana/utils/utils.py index f697fb730..d853b790b 100644 --- a/tedana/utils/utils.py +++ b/tedana/utils/utils.py @@ -1,7 +1,7 @@ """Utilities for meica package""" import numpy as np import nibabel as nib -from nibabel.filename_parser import split_add_ext +from nibabel.filename_parser import splitext_addext from nilearn._utils import check_niimg import nilearn.masking as nimask from scipy.optimize import leastsq @@ -151,18 +151,6 @@ def make_min_mask(data): return data.prod(axis=-1).prod(axis=-1).astype('bool') -def make_opt_com(medata): - """ - Makes optimal combination from input multi-echo data - - Parameters - ---------- - medata : tedana.interfaces.data.MultiEchoData - """ - - pass - - def fmask(data, mask=None): """ Masks `data` with non-zero entries of `mask` @@ -185,7 +173,7 @@ def fmask(data, mask=None): raise TypeError('Provided `data` and `mask` must be of same type.') if isinstance(data, str): - root, ext, addext = split_add_ext(data) + root, ext, addext = splitext_addext(data) if ext == '.gii': # mask need not apply for gii files fdata = np.column_stack([f.data for f in nib.load(data).darrays]) From a94692b9d8afc2e593bc2a594b94abc7e6e5b23a Mon Sep 17 00:00:00 2001 From: Ross Markello Date: Fri, 27 Apr 2018 16:20:13 -0400 Subject: [PATCH 03/18] [RF] Breaks everything; working to add gifti support In the process of adding gifti support, but breaking EVERYTHING. Ensuring that all stages of `tedana` do not require spatial information about the input data. Making minor aesthetic and stylistic updates as I go through the code. --- .gitignore | 2 + tedana/cli/run.py | 2 + tedana/interfaces/t2smap.py | 160 ++++++++++++++------------ tedana/interfaces/tedana.py | 150 ++++++++++++++----------- tedana/tests/test_utils.py | 60 ++++++++++ tedana/utils/__init__.py | 4 +- tedana/utils/utils.py | 218 ++++++++++++++++++++++-------------- 7 files changed, 372 insertions(+), 224 deletions(-) create mode 100644 tedana/tests/test_utils.py diff --git a/.gitignore b/.gitignore index 7bbc71c09..02a7b22b3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +data/ + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/tedana/cli/run.py b/tedana/cli/run.py index 4448ea811..65d9f6919 100644 --- a/tedana/cli/run.py +++ b/tedana/cli/run.py @@ -5,10 +5,12 @@ def get_parser(): """ Parses command line inputs for tedana + Returns ------- parser.parse_args() : argparse dict """ + parser = argparse.ArgumentParser() parser.add_argument('-d', dest='data', diff --git a/tedana/interfaces/t2smap.py b/tedana/interfaces/t2smap.py index 68ae8ec2c..5a6f94646 100644 --- a/tedana/interfaces/t2smap.py +++ b/tedana/interfaces/t2smap.py @@ -7,102 +7,113 @@ lgr = logging.getLogger(__name__) -def t2sadmap(catd, mask, tes, masksum, start_echo): +def t2sadmap(data, tes, mask, masksum, start_echo): """ - t2sadmap(catd,mask,tes,masksum) - - Input: - - catd has shape (nx,ny,nz,Ne,nt) - mask has shape (nx,ny,nz) - tes is a 1d numpy array - masksum + Parameters + ---------- + data : (S x E x T) array_like + Multi-echo data array, where `S` is samples, `E` is echos, and `T` is + time + tes : (E, ) list + Echo times + mask : (S, ) array_like + Boolean array indicating samples that are consistently (i.e., across + time AND echoes) non-zero + masksum : (S, ) array_like + Valued array indicating number of echos that have sufficient signal in + given sample + start_echo : int + First echo to consider + + Returns + ------- + t2sa : (S x E x T) np.ndarray + Limited T2* map + s0va : (S x E x T) np.ndarray + Limited S0 map + t2ss : (S x E x T) np.ndarray + ??? + s0vs : (S x E x T) np.ndarray + ??? + t2saf : (S x E x T) np.ndarray + Full T2* map + s0vaf : (S x E x T) np.ndarray + Full S0 map """ - nx, ny, nz, Ne, nt = catd.shape - echodata = fmask(catd, mask) - Nm = echodata.shape[0] - t2ss = np.zeros([nx, ny, nz, Ne - 1]) - s0vs = t2ss.copy() + n_samp, n_echos, n_vols = data.shape + t2ss, s0vs = np.zeros([n_samp, n_echos - 1]), np.zeros([n_samp, n_echos - 1]) - for ne in range(start_echo, Ne + 1): - - # Do Log Linear fit - B = np.reshape(np.abs(echodata[:, :ne]) + 1, (Nm, ne * nt)).transpose() + for echo in range(start_echo, n_echos + 1): + # perform log linear fit of echo times against MR signal + B = np.reshape(np.abs(data[:, :echo, :]) + 1, + (n_samp, echo * n_vols)).T B = np.log(B) - neg_tes = [-1 * te for te in tes[:ne]] - x = np.array([np.ones(ne), neg_tes]) - X = np.tile(x, (1, nt)) - X = np.sort(X)[:, ::-1].transpose() + neg_tes = [-1 * te for te in tes[:echo]] + x = np.array([np.ones(echo), neg_tes]) + X = np.tile(x, (1, n_vols)) + X = np.sort(X)[:, ::-1].T beta, res, rank, sing = np.linalg.lstsq(X, B) - t2s = 1 / beta[1, :].transpose() - s0 = np.exp(beta[0, :]).transpose() - - t2s[np.isinf(t2s)] = 500. - s0[np.isnan(s0)] = 0. - - t2ss[:, :, :, ne - 2] = np.squeeze(unmask(t2s, mask)) - s0vs[:, :, :, ne - 2] = np.squeeze(unmask(s0, mask)) - - # Limited T2* and S0 maps - fl = np.zeros([nx, ny, nz, len(tes) - 2 + 1]) - for ne in range(Ne - 1): - fl_ = np.squeeze(fl[:, :, :, ne]) - fl_[masksum == ne + 2] = True - fl[:, :, :, ne] = fl_ - fl = np.array(fl, dtype=bool) - t2sa = np.squeeze(unmask(t2ss[fl], masksum > 1)) - s0va = np.squeeze(unmask(s0vs[fl], masksum > 1)) - - # Full T2* maps with S0 estimation errors - t2saf = t2sa.copy() - s0vaf = s0va.copy() + t2s = 1 / beta[1, :].T + s0 = np.exp(beta[0, :]).T + + t2s[np.isinf(t2s)] = 500. # why 500? + s0[np.isnan(s0)] = 0. # why 0? + + t2ss[..., echo - 2] = np.squeeze(t2s) + s0vs[..., echo - 2] = np.squeeze(s0) + + # create limited T2* and S0 maps + fl = np.zeros([n_samp, len(tes) - 1], dtype=bool) + for echo in range(n_echos - 1): + fl_ = np.squeeze(fl[..., echo]) + fl_[masksum == echo + 2] = True + fl[..., echo] = fl_ + t2sa, s0va = masksum.copy(), masksum.copy() + t2sa[masksum > 1], s0va[masksum > 1] = t2ss[fl], s0vs[fl] + + # create full T2* maps with S0 estimation errors + t2saf, s0vaf = t2sa.copy(), s0va.copy() t2saf[masksum == 1] = t2ss[masksum == 1, 0] s0vaf[masksum == 1] = s0vs[masksum == 1, 0] return t2sa, s0va, t2ss, s0vs, t2saf, s0vaf -def optcom(data, t2, tes, mask, combmode, useG=False): +def optcom(data, t2, tes, mask, combmode): """ - out = optcom(data,t2s) - - - Input: - - data.shape = (nx,ny,nz,Ne,Nt) - t2s.shape = (nx,ny,nz) - tes.shape = len(Ne) - - Output: - - out.shape = (nx,ny,nz,Nt) + Parameters + ---------- + data : (S x E x T) array_like + t2 : (S, ) array_like + tes : (E, ) list + combmode : str + Must be in ['ste', 't2s']. Determines method for optimal combination + + Returns + ------- + comb_data : (S x T) np.ndarray + Optimally combined data """ - nx, ny, nz, Ne, Nt = data.shape - if useG: - fdat = fmask(data, mask) - ft2s = fmask(t2, mask) + n_samp, n_echos, n_vols = data.shape - else: - fdat = fmask(data, mask) - ft2s = fmask(t2, mask) + tes = np.array(tes)[np.newaxis] # (1 x E) array_like + t2s = t2[:, np.newaxis] # (S x 1) array_like - tes = np.array(tes) - tes = tes[np.newaxis, :] - ft2s = ft2s[:, np.newaxis] + comb_data = np.zeros((data.shape[0], data.shape[-1])) + mdata = data[mask] if combmode == 'ste': - alpha = fdat.mean(-1) * tes + alpha = mdata.mean(axis=-1) * tes else: - alpha = tes * np.exp(-tes / ft2s) + alpha = tes * np.exp(-tes / t2s[mask]) - alpha = np.tile(alpha[:, :, np.newaxis], (1, 1, Nt)) + alpha = np.tile(alpha[:, :, np.newaxis], (1, 1, n_vols)) + comb_data[mask] = np.average(mdata, axis=1, weights=alpha) - fout = np.average(fdat, axis=1, weights=alpha) - out = unmask(fout, mask) - return out + return comb_data def main(options): @@ -115,6 +126,7 @@ def main(options): tes = [float(te) for te in options.tes] ne = len(tes) + catim = nib.load(options.data[0]) head = catim.get_header() head.extensions = [] @@ -127,7 +139,7 @@ def main(options): mask, masksum = makeadmask(catd, minimum=False, getsum=True) lgr.info('++ Computing Adaptive T2* map') - t2s, s0, t2ss, s0vs, t2saf, s0vaf = t2sadmap(catd, mask, tes, masksum, 2) + t2s, s0, t2ss, s0vs, t2saf, s0vaf = t2sadmap(catd, tes, mask, masksum, 2) niwrite(masksum, aff, 'masksum%s.nii' % suf) niwrite(t2ss, aff, 't2ss%s.nii' % suf) niwrite(s0vs, aff, 's0vs%s.nii' % suf) diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py index b69b99dc3..175ae991a 100644 --- a/tedana/interfaces/tedana.py +++ b/tedana/interfaces/tedana.py @@ -6,9 +6,10 @@ import numpy as np import nibabel as nib from sklearn import svm +from scipy.special import lpmv import scipy.stats as stats from tedana.interfaces import (optcom, t2sadmap) -from tedana.utils import (cat2echos, uncat2echos, make_mask, +from tedana.utils import (cat2echos, uncat2echos, make_min_mask, makeadmask, fmask, unmask, fitgaussian, niwrite, dice, andb) @@ -313,8 +314,8 @@ def eimask(dd, ees=None): hthr = 5 * stats.scoreatpercentile(dd[:, ee, :].flatten(), 98, interpolation_method='lower') lgr.info(lthr, hthr) - imask[dd[:, ee, :].mean(1) > lthr, ee] = 1 - imask[dd[:, ee, :].mean(1) > hthr, ee] = 0 + imask[dd[:, ee, :].mean(axis=1) > lthr, ee] = 1 + imask[dd[:, ee, :].mean(axis=1) > hthr, ee] = 0 return imask @@ -1022,31 +1023,29 @@ def selcomps(seldict, mmix, head, manacc, debug=False, olevel=2, oversion=99, def tedpca(combmode, mask, stabilize, head, ste=0, mlepca=True): - nx, ny, nz, n_echos, nt = catd.shape + n_samp, n_echos, n_vols = catd.shape ste = np.array([int(ee) for ee in str(ste).split(',')]) if len(ste) == 1 and ste[0] == -1: lgr.info('-Computing PCA of optimally combined multi-echo data') - OCmask = make_mask(OCcatd[:, :, :, np.newaxis, :]) - d = fmask(OCcatd, OCmask) + d = OCcatd[make_min_mask(OCcatd[:, np.newaxis, :])] eim = eimask(d[:, np.newaxis, :]) eim = eim[:, 0] == 1 d = d[eim, :] elif len(ste) == 1 and ste[0] == 0: lgr.info('-Computing PCA of spatially concatenated multi-echo data') ste = np.arange(n_echos) - d = np.float64(fmask(catd, mask)) + d = catd[mask].astype('float64') eim = eimask(d) == 1 d = d[eim] else: lgr.info('-Computing PCA of TE #%s' % ','.join([str(ee) for ee in ste])) - d = np.float64(np.concatenate([fmask(catd[:, :, :, ee, :], - mask)[:, np.newaxis, :] for ee in ste-1], axis=1)) - eim = eimask(d) == 1 - eim = np.squeeze(eim) + d = np.concatenate([catd[mask, ee, :][:, np.newaxis] for ee in ste - 1], + axis=1).astype('float64') + eim = np.squeeze(eimask(d) == 1) d = np.squeeze(d[eim]) - dz = ((d.T - d.T.mean(0)) / d.T.std(0)).T # Variance normalize timeseries - dz = (dz - dz.mean()) / dz.std() # Variance normalize everything + dz = ((d.T - d.T.mean(axis=0)) / d.T.std(axis=0)).T # var normalize ts + dz = (dz - dz.mean()) / dz.std() # var normalize everything if not os.path.exists('pcastate.pkl'): @@ -1057,18 +1056,20 @@ def tedpca(combmode, mask, stabilize, head, ste=0, mlepca=True): ppca.fit(dz) v = ppca.components_ s = ppca.explained_variance_ - u = np.dot(np.dot(dz, v.T), np.diag(1./s)) + u = np.dot(np.dot(dz, v.T), np.diag(1. / s)) else: u, s, v = np.linalg.svd(dz, full_matrices=0) sp = s/s.sum() eigelb = sp[getelbow_mod(sp)] - spdif = np.abs(sp[1:]-sp[:-1]) + spdif = np.abs(sp[1:] - sp[:-1]) spdifh = spdif[(spdif.shape[0]//2):] spdmin = spdif.min() spdthr = np.mean([spdifh.max(), spdmin]) - spmin = sp[(spdif.shape[0]//2)+(np.arange(spdifh.shape[0])[spdifh >= spdthr][0]) + 1] + spmin = sp[(spdif.shape[0]//2) + + (np.arange(spdifh.shape[0])[spdifh >= spdthr][0]) + + 1] spcum = [] spcumv = 0 for sss in sp: @@ -1079,8 +1080,11 @@ def tedpca(combmode, mask, stabilize, head, ste=0, mlepca=True): # Compute K and Rho for PCA comps eimum = np.atleast_2d(eim) eimum = np.transpose(eimum, np.argsort(np.atleast_2d(eim).shape)[::-1]) - eimum = np.array(np.squeeze(unmask(eimum.prod(1), mask)), - dtype=np.bool) + eimum = eimum.prod(axis=1) + o = np.zeros((mask.shape[0], *eimum.shape[1:])) + o[mask] = eimum + eimum = np.squeeze(o).astype(bool) + vTmix = v.T vTmixN = ((vTmix.T - vTmix.T.mean(0)) / vTmix.T.std(0)).T _, ctb, betasv, v_T = fitmodels_direct(catd, v.T, eimum, t2s, t2sG, @@ -1178,51 +1182,52 @@ def gscontrol_raw(OCcatd, head, n_echos, dtrank=4): modify catd (global variable) to removal global signal out of individual echo time series datasets. The spatial global signal is estimated from the optimally combined data after detrending with a Legendre - polynomial basis of order=0 and degree=dtrank. + polynomial basis of `order = 0` and `degree = dtrank`. """ lgr.info('++ Applying amplitude-based T1 equilibration correction') # Legendre polynomial basis for denoising - from scipy.special import lpmv - Lmix = np.array([lpmv(0, vv, np.linspace(-1, 1, OCcatd.shape[-1])) for vv in range(dtrank)]).T + n_vols = OCcatd.shape[-1] + Lmix = np.array([lpmv(0, vv, np.linspace(-1, 1, n_vols)) + for vv in range(dtrank)]).T - # Compute mean, std, mask local to this function + # compute mean, std, mask local to this function # inefficient, but makes this function a bit more modular - Gmu = OCcatd.mean(-1) + Gmu = OCcatd.mean(axis=-1) # temporal mean Gmask = Gmu != 0 # Find spatial global signal dat = OCcatd[Gmask] - Gmu[Gmask][:, np.newaxis] - sol = np.linalg.lstsq(Lmix, dat.T) # Legendre basis for detrending - detr = dat - np.dot(sol[0].T, Lmix.T)[0] - sphis = (detr).min(1) + sol = np.linalg.lstsq(Lmix, dat.T)[0] # Legendre basis for detrending + detr = dat - np.dot(sol.T, Lmix.T)[0] + sphis = (detr).min(axis=1) sphis -= sphis.mean() - niwrite(unmask(sphis, Gmask), aff, 'T1gs.nii', head) + # niwrite(unmask(sphis, Gmask), aff, 'T1gs.nii', head) # FIXME - # Find time course of the spatial global signal + # Find time course ofc the spatial global signal # make basis with the Legendre basis glsig = np.linalg.lstsq(np.atleast_2d(sphis).T, dat)[0] - glsig = (glsig-glsig.mean()) / glsig.std() + glsig = (glsig - glsig.mean()) / glsig.std() np.savetxt('glsig.1D', glsig) glbase = np.hstack([Lmix, glsig.T]) # Project global signal out of optimally combined data - sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T) - tsoc_nogs = dat - np.dot(np.atleast_2d(sol[0][dtrank]).T, + sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T)[0] + tsoc_nogs = dat - np.dot(np.atleast_2d(sol[dtrank]).T, np.atleast_2d(glbase.T[dtrank])) + Gmu[Gmask][:, np.newaxis] - niwrite(OCcatd, aff, 'tsoc_orig.nii', head) - OCcatd = unmask(tsoc_nogs, Gmask) - niwrite(OCcatd, aff, 'tsoc_nogs.nii', head) + # niwrite(OCcatd, aff, 'tsoc_orig.nii', head) # FIXME + OCcatd[Gmask] = tsoc_nogs + # niwrite(OCcatd, aff, 'tsoc_nogs.nii', head) # FIXME # Project glbase out of each echo - for ii in range(n_echos): - dat = catd[:, :, :, ii, :][Gmask] - sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T) - e_nogs = dat - np.dot(np.atleast_2d(sol[0][dtrank]).T, + for echo in range(n_echos): + dat = catd[Gmask, echo, :] + sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T)[0] + e_nogs = dat - np.dot(np.atleast_2d(sol[dtrank]).T, np.atleast_2d(glbase.T[dtrank])) - catd[:, :, :, ii, :] = unmask(e_nogs, Gmask) + catd[Gmask, echo, :] = e_nogs return catd, OCcatd @@ -1393,7 +1398,6 @@ def writeresults_echoes(acc, rej, midk, head, comptable, mmix): def main(options): """ - Args (and defaults): data, tes, mixm=None, ctab=None, manacc=None, strict=False, no_gscontrol=False, kdaw=10., rdaw=1., conv=2.5e-5, ste=-1, @@ -1407,48 +1411,61 @@ def main(options): tes = [float(te) for te in options.tes] n_echos = len(tes) - # get some info on the input data - # TODO: only works on nifti + # TODO: attempt to derive input data format as soon as possible + # we'll need to carry this through to writing out all the resultant output + # files for the rest of the script; options should include .nii and .gii + # + # output_type = get_input_type(options.data) + + # FIXME: only works on nifti catim = nib.load(options.data[0]) head = catim.header - head.extensions = [] # clear extension info in header - head.set_sform(head.get_sform(), code=1) # reset sform code - aff = catim.get_affine() # TODO: gifti has no affine + head.extensions = [] + head.set_sform(head.get_sform(), code=1) + aff = catim.affine + + # coerce data to samples x echos x time array catd = cat2echos(options.data, n_echos=n_echos) - nx, ny, nz, n_echos, nt = catd.shape + n_samp, n_echos, n_vols = catd.shape - # parse options, prepare output directory + # FIXME: only works on nifti if options.fout: options.fout = aff else: options.fout = None global kdaw, rdaw + kdaw = float(options.kdaw) + rdaw = float(options.rdaw) + if not options.stabilize: stabilize = False else: stabilize = True - kdaw = float(options.kdaw) - rdaw = float(options.rdaw) + # prepare output directory, copy over pre-generated outputs + dirname = 'TED' if options.label is not None: - dirname = '.'.join(['TED', options.label]) - else: - dirname = 'TED' - os.mkdir(dirname) + dirname = '.'.join([dirname, options.label]) + os.mkdir(dirname) # should we check to see if this already exists? if options.mixm is not None: try: - shutil.copyfile(options.mixm, os.path.join(dirname, 'meica_mix.1D')) - shutil.copyfile(options.mixm, os.path.join(dirname, os.path.basename(options.mixm))) + shutil.copyfile(options.mixm, + os.path.join(dirname, 'meica_mix.1D')) + shutil.copyfile(options.mixm, + os.path.join(dirname, + os.path.basename(options.mixm))) except shutil.Error: pass if options.ctab is not None: try: - shutil.copyfile(options.mixm, os.path.join(dirname, 'comp_table.txt')) - shutil.copyfile(options.mixm, os.path.join(dirname, os.path.basename(options.mixm))) + shutil.copyfile(options.ctab, + os.path.join(dirname, 'comp_table.txt')) + shutil.copyfile(options.ctab, + os.path.join(dirname, + os.path.basename(options.ctab))) except shutil.Error: pass - os.chdir(dirname) lgr.info('++ Computing Mask') @@ -1457,12 +1474,16 @@ def main(options): lgr.info('++ Computing T2* map') global t2s, s0, t2ss, s0s, t2sG, s0G - t2s, s0, t2ss, s0s, t2sG, s0G = t2sadmap(catd, mask, tes, masksum, 1) + # TODO: can we maybe not do this? returning six things is a lot... + # also, WHAT ARE THEY?!?!? + t2s, s0, t2ss, s0s, t2sG, s0G = t2sadmap(catd, tes, mask, masksum, 1) - # Condition values + # set a hard cap for the T2* map + # anything that is 10x higher than the 99.5 %ile will be reset to cap_t2s = stats.scoreatpercentile(t2s.flatten(), 99.5, interpolation_method='lower') - t2s[t2s > cap_t2s*10] = cap_t2s + t2s[t2s > cap_t2s * 10] = cap_t2s + # FIXME: need to write the appropriate output file type! niwrite(s0, aff, 's0v.nii', head) niwrite(t2s, aff, 't2sv.nii', head) niwrite(t2ss, aff, 't2ss.nii', head) @@ -1470,11 +1491,10 @@ def main(options): niwrite(s0G, aff, 's0vG.nii', head) niwrite(t2sG, aff, 't2svG.nii', head) - # Optimally combine data + # optimally combine data combmode = options.combmode global OCcatd - OCcatd = optcom(catd, t2sG, tes, mask, - combmode, useG=True) + OCcatd = optcom(catd, t2sG, tes, mask, combmode) if not options.no_gscontrol: catd, OCcatd = gscontrol_raw(OCcatd, head, len(tes)) @@ -1522,7 +1542,7 @@ def main(options): lgr.info('** WARNING! No BOLD components detected!!! \n' '** Please check data and results!') - writeresults(OCcatd, comptable, mmix, nt, acc, rej, midk, empty, head) + writeresults(OCcatd, comptable, mmix, n_vols, acc, rej, midk, empty, head) gscontrol_mmix(mmix, acc, rej, midk, empty, head) if options.dne: writeresults_echoes(acc, rej, midk, head, comptable, mmix) diff --git a/tedana/tests/test_utils.py b/tedana/tests/test_utils.py new file mode 100644 index 000000000..899f71018 --- /dev/null +++ b/tedana/tests/test_utils.py @@ -0,0 +1,60 @@ +""" +Tests for tedana.utils +""" + +import os.path +from tedana import utils +import nibabel as nb +import numpy as np + + +def test_load_image(): + pass + + +def test_cat2echos(): + pass + + +def test_makeadmask(): + pass + + +def test_make_min_mask(): + pass + + +def test_uncat2echos(): + pass + + +def test_fmask(): + pass + + +def test_unmask(): + pass + + +def test_moments(): + pass + + +def test_gaussian(): + pass + + +def test_fitgaussian(): + pass + + +def test_niwrite(): + pass + + +def test_dice(): + pass + + +def test_andb(): + pass diff --git a/tedana/utils/__init__.py b/tedana/utils/__init__.py index 35925964d..4059ccb24 100644 --- a/tedana/utils/__init__.py +++ b/tedana/utils/__init__.py @@ -2,13 +2,13 @@ # ex: set sts=4 ts=4 sw=4 et: from .utils import ( - cat2echos, uncat2echos, make_mask, + cat2echos, uncat2echos, make_min_mask, makeadmask, fmask, unmask, fitgaussian, niwrite, dice, andb, ) __all__ = [ - 'cat2echos', 'uncat2echos', 'make_mask', + 'cat2echos', 'uncat2echos', 'make_min_mask', 'makeadmask', 'fmask', 'unmask', 'fitgaussian', 'niwrite', 'dice', 'andb'] diff --git a/tedana/utils/utils.py b/tedana/utils/utils.py index d853b790b..e8b25af59 100644 --- a/tedana/utils/utils.py +++ b/tedana/utils/utils.py @@ -9,71 +9,86 @@ from ..due import due, BibTeX -# TODO: Currently only accepts niftis -- do we need it to accept giftis? -def cat2echos(data, n_echos=None): +def load_image(data): """ - Coerces input `data` files to required array output + Takes input `data` and returns a sample x time array Parameters ---------- - data : (X x Y x M x T) array_like or list-of-niimg-like - Input multi-echo data array or independent echo files, where M is Z * - the number of echos - n_echos : int - Number of echos + data : (X x Y x Z [x T]) array_like or niimg-like object + Data array or data file to be loaded / reshaped Returns ------- - fdata : (X x Y x Z x E x T) np.ndarray - Where `X`, `Y`, `Z` are spatial dims, `E` is echos, and `T` is time + fdata : (S x T) np.ndarray + Reshaped `data`, where `S` is samples and `T` is time """ - if isinstance(data, list): - # the individual echo files were provided - if len(data) > 2: - fdata = np.stack([nib.load(f).get_data() for f in data], axis=3) - # ensure we have a time dimension - if fdata.ndim < 5: - fdata = fdata[..., np.newaxis] + if isinstance(data, str): + root, ext, addext = splitext_addext(data) + if ext == '.gii': + fdata = np.column_stack([f.data for f in nib.load(data).darrays]) return fdata - # a z-concatenated file was provided (hopefully) - elif len(data) == 1: - if n_echos is None: - raise ValueError('Number of echos `n_echos` must be specified ' - 'if z-concatenated data file provided.') - data = nib.load(data[0]).get_data() else: - raise ValueError('Cannot run `tedana` with only two echos: ' - '{}'.format(data)) + data = check_niimg(data).get_data() - # either an array or a z-concatenated file was provided - nx, ny, nz = data.shape[:2], data.shape[2] // n_echos - fdata = data.reshape(nx, ny, nz, n_echos, -1, order='F') + fdata = data.reshape((-1,) + data.shape[3:]) - return fdata + return fdata.squeeze() -def uncat2echos(data): +def cat2echos(data, n_echos=None): """ - Combines Z- and echo-axis in `data` + Coerces input `data` files to required 3D array output Parameters ---------- - data : (X x Y x Z x E x T) array_like - Multi-echo data array + data : (X x Y x M x T) array_like or list-of-img-like + Input multi-echo data array, where `X` and `Y` are spatial dimensions, + `M` is the Z-spatial dimensions with all the input echos concatenated, + and `T` is time. A list of image-like objects (e.g., .nii or .gii) are + accepted, as well + n_echos : int, optional + Number of echos in provided data array. Only necessary if `data` is + array_like. Default: None Returns ------- - fdata : (X x Y x M x T) np.ndarray - Z-concatenated multi-echo data array, where M is Z * number of echos + fdata : (S x E x T) np.ndarray + Output data where `S` is samples, `E` is echos, and `T` is time """ - if data.ndim < 4: - raise ValueError('Input data must have at least four dimensions; ' - 'provided data has only {0}'.format(data.ndim)) + # data files were provided + if isinstance(data, list): + # individual echo files were provided + if len(data) > 2: + fdata = np.stack([load_image(f) for f in data], axis=1) + # a z-concatenated file was provided + elif len(data) == 1: + if n_echos is None: + raise ValueError('Number of echos `n_echos` must be specified ' + 'if z-concatenated data file provided.') + fdata = load_image(data[0]) + # only two echo files were provided, which doesn't fly + else: + raise ValueError('Cannot run `tedana` with only two echos: ' + '{}'.format(data)) + # ensure data has a time axis + if fdata.ndim < 3: + fdata = fdata[..., np.newaxis] + # data array was provided (is this necessary?) + elif isinstance(data, np.ndarray): + if data.ndim != 4: + raise ValueError('Data must be 4-dimensional, where the ' + 'dimensions correspond to: (1) first spatial ' + 'dimensions, (2) second spatial dimension, (3) ' + 'third spatial dimension x number of echos, and ' + '(4) time. Provided data dimensions were: ' + '{}'.format(data.shape)) + nx, ny, nz = data.shape[:2], data.shape[2] // n_echos + fdata = load_image(data.reshape(nx, ny, nz, n_echos, -1, order='F')) - (nx, ny), nz = data.shape[:2], np.prod(data.shape[2:4]) - return data.reshape(nx, ny, nz, -1, order='F') + return fdata def makeadmask(data, minimum=True, getsum=False): @@ -82,20 +97,21 @@ def makeadmask(data, minimum=True, getsum=False): Parameters ---------- - data : (X x Y x Z x E x T) array_like - Where `X`, `Y`, `Z` are spatial dims, `E` is echos, and `T` is time + data : (S x E x T) array_like + Multi-echo data array, where `S` is samples, `E` is echos, and `T` is + time minimum : bool, optional - Use `make_min_mask` instead of generating a map with echo-specific. - Default: True + Use `make_min_mask()` instead of generating a map with echo-specific + times. Default: True getsum : bool, optional - Return `masksum` in addition to mask. Default: False + Return `masksum` in addition to `mask`. Default: False Returns ------- - mask : (X x Y x Z) np.ndarray + mask : (S, ) np.ndarray Boolean array of voxels that have sufficient signal in at least one echo - masksum : (X x Y x Z) np.ndarray + masksum : (S, ) np.ndarray Valued array indicating the number of echos with sufficient signal in a given voxel. Only returned if `getsum = True` """ @@ -103,23 +119,23 @@ def makeadmask(data, minimum=True, getsum=False): if minimum: return make_min_mask(data) - x, y, z, n_echos, _ = data.shape - emeans = data.mean(axis=-1) - first_echo = emeans[:, :, :, 0] + n_samp, n_echos, n_vols = data.shape + echo_means = data.mean(axis=-1) # temporal mean of echos + first_echo = echo_means[..., 0] # make a map of longest echo with which a voxel can be sampled, with min # value of map as X value of voxel that has median value in the 1st echo # N.B. larger factor (%ile??) leads to bias to lower TEs - perc33 = np.percentile(first_echo[first_echo.nonzero()], 33, - interpolation='higher') # why take 33rd %ile? - medv = (first_echo == perc33) - lthrs = np.vstack([emeans[:, :, :, echo][medv] / 3 for echo in + perc_33 = np.percentile(first_echo[first_echo.nonzero()], 33, + interpolation='higher') # why take 33rd %ile? + med_val = (first_echo == perc_33) + lthrs = np.vstack([echo_means[..., echo][med_val] / 3 for echo in range(n_echos)]) # why divide by three? lthrs = lthrs[:, lthrs.sum(0).argmax()] mthr = np.ones(data.shape[:-1]) for echo in range(n_echos): - mthr[:, :, :, echo] *= lthrs[echo] + mthr[..., echo] *= lthrs[echo] - masksum = (np.abs(emeans) > mthr).astype('int').sum(axis=-1) + masksum = (np.abs(echo_means) > mthr).astype('int').sum(axis=-1) mask = (masksum != 0) if getsum: @@ -137,18 +153,71 @@ def make_min_mask(data): Parameters ---------- - data : (X x Y x Z x E x T) array_like - Multi-echo data array, where X, Y, Z are spatial dimensions, E - corresponds to individual echo data, and T is time + data : (S x E x T) array_like + Multi-echo data array, where `S` is samples, `E` is echos, and `T` is + time Returns ------- - mask : (X x Y x Z) np.ndarray + mask : (S, ) np.ndarray Boolean array """ - data = np.asarray(data) - return data.prod(axis=-1).prod(axis=-1).astype('bool') + data = np.asarray(data).astype(bool) + return data.prod(axis=-1).prod(axis=-1).astype(bool) + + +def get_input_type(input): + pass + + +def niwrite(data, affine, name, head, outtype='.nii.gz'): + """ + Write out nifti file. + + Parameters + ---------- + data : array_like + affine : (4 x 4) array_like + Affine for output file + name : str + Name to save output file to + head : object + outtype : str, optional + Output type of file. Default: '.nii.gz' + """ + + # get rid of NaN + data[np.isnan(data)] = 0 + # set header info + header = head.copy() + header.set_data_shape(list(data.shape)) + outni = nib.Nifti1Image(data, affine, header=header) + outni.set_data_dtype('float64') + outni.to_filename(name) + + +def uncat2echos(data): + """ + Combines Z- and echo-axis in `data` + + Parameters + ---------- + data : (X x Y x Z x E x T) array_like + Multi-echo data array + + Returns + ------- + fdata : (X x Y x M x T) np.ndarray + Z-concatenated multi-echo data array, where M is Z * number of echos + """ + + if data.ndim < 4: + raise ValueError('Input data must have at least four dimensions; ' + 'provided data has only {0}'.format(data.ndim)) + + (nx, ny), nz = data.shape[:2], np.prod(data.shape[2:4]) + return data.reshape(nx, ny, nz, -1, order='F') def fmask(data, mask=None): @@ -164,9 +233,8 @@ def fmask(data, mask=None): Returns ------- - fdata : (V [x E] x T) np.ndarray - Masked `data`, where `V` is voxels/vertices, `E` is echoes, and `T` is - time + fdata : (S x E x T) np.ndarray + Masked `data`, where `S` is samples, `E` is echoes, and `T` is time """ if mask is not None and not type(data) == type(mask): @@ -200,8 +268,8 @@ def unmask(data, mask): Parameters ---------- - data : (V x E x T) array_like - Masked array, where V is voxels flattened across spatial dimensions + data : (S x E x T) array_like + Masked array, where S is samples flattened across spatial dimensions mask : (X x Y x Z) array_like Boolean array that was used to mask `data` @@ -323,22 +391,6 @@ def errorfunction(p, data): return p -def niwrite(data, affine, name, head, header=None): - """ - Write out nifti file. - """ - data[np.isnan(data)] = 0 - if header is None: - this_header = head.copy() - this_header.set_data_shape(list(data.shape)) - else: - this_header = header - - outni = nib.Nifti1Image(data, affine, header=this_header) - outni.set_data_dtype('float64') - outni.to_filename(name) - - @due.dcite(BibTeX('@article{dice1945measures,' 'author={Dice, Lee R},' 'title={Measures of the amount of ecologic association between species},' From 4e735bb97268688a7613ad8745f537b31f145940 Mon Sep 17 00:00:00 2001 From: Ross Markello Date: Fri, 4 May 2018 01:04:47 -0400 Subject: [PATCH 04/18] [RF] Still very broken fixing fitmodels_direct Everything is still very broken as I'm working to through to fix all the functions to work with 3D arrays (samples x echos x time). I found the place where things start to diverge from the previous versions, but it seems to be due to numerical instabilities? Unclear. Will figure out later! --- tedana/interfaces/t2smap.py | 23 ++-- tedana/interfaces/tedana.py | 225 ++++++++++++++++++------------------ tedana/utils/utils.py | 55 +++------ 3 files changed, 141 insertions(+), 162 deletions(-) diff --git a/tedana/interfaces/t2smap.py b/tedana/interfaces/t2smap.py index 5a6f94646..f83d193a6 100644 --- a/tedana/interfaces/t2smap.py +++ b/tedana/interfaces/t2smap.py @@ -42,17 +42,16 @@ def t2sadmap(data, tes, mask, masksum, start_echo): """ n_samp, n_echos, n_vols = data.shape + data = data[mask] t2ss, s0vs = np.zeros([n_samp, n_echos - 1]), np.zeros([n_samp, n_echos - 1]) for echo in range(start_echo, n_echos + 1): # perform log linear fit of echo times against MR signal - B = np.reshape(np.abs(data[:, :echo, :]) + 1, - (n_samp, echo * n_vols)).T - B = np.log(B) - neg_tes = [-1 * te for te in tes[:echo]] - x = np.array([np.ones(echo), neg_tes]) - X = np.tile(x, (1, n_vols)) - X = np.sort(X)[:, ::-1].T + # make DV matrix: samples x (time series * echos) + B = np.log((np.abs(data[:, :echo, :]) + 1).reshape(len(data), -1).T) + # make IV matrix: intercept/TEs x (time series * echos) + x = np.column_stack([np.ones(echo), [-te for te in tes[:echo]]]) + X = np.repeat(x, n_vols, axis=0) beta, res, rank, sing = np.linalg.lstsq(X, B) t2s = 1 / beta[1, :].T @@ -61,8 +60,8 @@ def t2sadmap(data, tes, mask, masksum, start_echo): t2s[np.isinf(t2s)] = 500. # why 500? s0[np.isnan(s0)] = 0. # why 0? - t2ss[..., echo - 2] = np.squeeze(t2s) - s0vs[..., echo - 2] = np.squeeze(s0) + t2ss[..., echo - 2] = np.squeeze(unmask(t2s, mask)) + s0vs[..., echo - 2] = np.squeeze(unmask(s0, mask)) # create limited T2* and S0 maps fl = np.zeros([n_samp, len(tes) - 1], dtype=bool) @@ -70,8 +69,8 @@ def t2sadmap(data, tes, mask, masksum, start_echo): fl_ = np.squeeze(fl[..., echo]) fl_[masksum == echo + 2] = True fl[..., echo] = fl_ - t2sa, s0va = masksum.copy(), masksum.copy() - t2sa[masksum > 1], s0va[masksum > 1] = t2ss[fl], s0vs[fl] + t2sa, s0va = unmask(t2ss[fl], masksum > 1), unmask(s0vs[fl], masksum > 1) + # t2sa[masksum > 1], s0va[masksum > 1] = t2ss[fl], s0vs[fl] # create full T2* maps with S0 estimation errors t2saf, s0vaf = t2sa.copy(), s0va.copy() @@ -89,7 +88,7 @@ def optcom(data, t2, tes, mask, combmode): t2 : (S, ) array_like tes : (E, ) list combmode : str - Must be in ['ste', 't2s']. Determines method for optimal combination + Must be in ['ste', 't2s']. Determines method for optimal combination. Returns ------- diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py index 175ae991a..b903e361f 100644 --- a/tedana/interfaces/tedana.py +++ b/tedana/interfaces/tedana.py @@ -8,6 +8,7 @@ from sklearn import svm from scipy.special import lpmv import scipy.stats as stats +from scipy.stats import skew from tedana.interfaces import (optcom, t2sadmap) from tedana.utils import (cat2echos, uncat2echos, make_min_mask, makeadmask, fmask, unmask, @@ -147,37 +148,38 @@ def rankvec(vals): def get_coeffs(data, mask, X, add_const=False): """ - get_coeffs(data,X) + get_coeffs(data, X) Parameters ---------- - data : array-like - Array of shape (nx, ny, nz, nt) - mask : array-like - Array of shape (nx, ny, nz) - X : array-like - Array of shape (nt, nc) + data : (S x T) array-like + Array where `S` is samples and `T` is time + mask : (S,) array-like + Boolean mask array + X : (T x C) array-like + Array where `T` is time and `C` is components add_const : bool, optional - Default is False. + Add intercept column to `X` before fitting. Default: False Returns ------- - out : array_like - Array of shape (nx, ny, nz, nc) + out : (S x C) np.ndarray + Array of betas for all samples `S` """ - mdata = fmask(data, mask).transpose() + + mdata = data[mask].T # Coerce X to >=2d X = np.atleast_2d(X) if X.shape[0] == 1: X = X.T - Xones = np.atleast_2d(np.ones(np.min(mdata.shape))).T - if add_const: - X = np.hstack([X, Xones]) + if add_const: # add intercept + Xones = np.ones((np.min(mdata.shape), 1)) + X = np.column_stack([X, Xones]) - tmpbetas = np.linalg.lstsq(X, mdata)[0].transpose() - if add_const: + tmpbetas = np.linalg.lstsq(X, mdata)[0].T + if add_const: # drop beta for intercept tmpbetas = tmpbetas[:, :-1] out = unmask(tmpbetas, mask) @@ -235,10 +237,11 @@ def getelbow_mod(ks, val=False): Either the elbow index (if val is True) or the values at the elbow index (if val is False) """ + ks = np.sort(ks)[::-1] nc = ks.shape[0] coords = np.array([np.arange(nc), ks]) - p = coords - np.tile(np.reshape(coords[:, 0], (2, 1)), (1, nc)) + p = coords - coords[:, 0].reshape(2, 1) b = p[:, -1] b_hat = np.reshape(b / np.sqrt((b ** 2).sum()), (2, 1)) proj_p_b = p - np.dot(b_hat.T, p) * np.tile(b_hat, (1, nc)) @@ -304,18 +307,21 @@ def getfbounds(n_echos): def eimask(dd, ees=None): + """ + Returns mask for data between [0.001, 5] * 98th percentile of dd + """ if ees is None: ees = range(dd.shape[1]) - imask = np.zeros([dd.shape[0], len(ees)]) + imask = np.zeros([dd.shape[0], len(ees)], dtype=bool) for ee in ees: lgr.info(ee) - lthr = 0.001 * stats.scoreatpercentile(dd[:, ee, :].flatten(), - 98, interpolation_method='lower') - hthr = 5 * stats.scoreatpercentile(dd[:, ee, :].flatten(), - 98, interpolation_method='lower') + perc98 = stats.scoreatpercentile(dd[:, ee, :].flatten(), 98, + interpolation_method='lower') + lthr, hthr = 0.001 * perc98, 5 * perc98 lgr.info(lthr, hthr) - imask[dd[:, ee, :].mean(axis=1) > lthr, ee] = 1 - imask[dd[:, ee, :].mean(axis=1) > hthr, ee] = 0 + m = dd[:, ee, :].mean(axis=1) + imask[np.logical_and(m > lthr, m < hthr), ee] = True + return imask @@ -332,16 +338,22 @@ def split_ts(data, comptable, mmix, acc, rej, midk): def computefeats2(data, mmix, mask, normalize=True): # Write feature versions of components data = data[mask] - data_vn = (data-data.mean(axis=-1)[:, np.newaxis])/data.std(axis=-1)[:, np.newaxis] + # demean data + data_vn = (data - data.mean(axis=-1, keepdims=True)) / data.std(axis=-1, keepdims=True) + # get betas for demeaned data against `mmix` data_R = get_coeffs(unmask(data_vn, mask), mask, mmix)[mask] - data_R[data_R < -.999] = -0.999 - data_R[data_R > .999] = .999 + # cap betas to range [-0.999, 0.999] + data_R[data_R < -0.999] = -0.999 + data_R[data_R > 0.999] = 0.999 + # R-to-Z transform? data_Z = np.arctanh(data_R) if len(data_Z.shape) == 1: data_Z = np.atleast_2d(data_Z).T if normalize: - data_Z = (((data_Z.T - data_Z.mean(0)[:, np.newaxis]) / - data_Z.std(0)[:, np.newaxis]) + (data_Z.mean(0)/data_Z.std(0))[:, np.newaxis]).T + # standardize + data_Zm = (data_Z - data_Z.mean(axis=0, keepdims=True)) / data_Z.std(axis=0, keepdims=True) + # add back mean / stdev + data_Z = data_Zm + (data_Z.mean(axis=0, keepdims=True) / data_Z.std(axis=0, keepdims=True)) return data_Z @@ -359,36 +371,31 @@ def ctabsel(ctabfile): def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, head, fout=None, reindex=False, mmixN=None, full_sel=True): """ - Usage: - - fitmodels_direct(fout) - Input: fout is flag for output of per-component TE-dependence maps t2s is a (nx,ny,nz) ndarray tes is a 1d array """ - # Compute opt. com. raw data - tsoc = np.array(optcom(catd, t2sG, tes, mask, combmode, useG=True), + # compute optimal combination of raw data + tsoc = np.array(optcom(catd, t2sG, tes, mask, combmode), dtype=float)[mask] - tsoc_mean = tsoc.mean(axis=-1) - tsoc_dm = tsoc - tsoc_mean[:, np.newaxis] + # demean optimal combination + tsoc_dm = tsoc - tsoc.mean(axis=-1, keepdims=True) - # Compute un-normalized weight dataset (features) + # compute un-normalized weight dataset (features) if mmixN is None: mmixN = mmix WTS = computefeats2(unmask(tsoc, mask), mmixN, mask, normalize=False) - # Compute PSC dataset - shouldn't have to refit data + # compute PSC dataset - shouldn't have to refit data global tsoc_B tsoc_B = get_coeffs(unmask(tsoc_dm, mask), mask, mmix)[mask] tsoc_Babs = np.abs(tsoc_B) - PSC = tsoc_B/tsoc.mean(axis=-1)[:, np.newaxis]*100 + PSC = tsoc_B / tsoc.mean(axis=-1, keepdims=True) * 100 - # Compute skews to determine signs based on unnormalized weights, + # compute skews to determine signs based on unnormalized weights, # correct mmix & WTS signs based on spatial distribution tails - from scipy.stats import skew signs = skew(WTS, axis=0) signs /= np.abs(signs) mmix = mmix.copy() @@ -398,53 +405,52 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, head, totvar = (tsoc_B**2).sum() totvar_norm = (WTS**2).sum() - # Compute Betas and means over TEs for TE-dependence analysis + # compute Betas and means over TEs for TE-dependence analysis n_echos = len(tes) - betas = cat2echos(get_coeffs(uncat2echos(catd), - np.tile(mask, (1, 1, n_echos)), - mmix), n_echos) - nx, ny, nz, n_echos, nc = betas.shape + betas = get_coeffs(catd, + np.repeat(mask[:, np.newaxis], n_echos, axis=1), + mmix) + n_samp, n_echos, n_components = betas.shape Nm = mask.sum() NmD = (t2s != 0).sum() mu = catd.mean(axis=-1) tes = np.reshape(tes, (n_echos, 1)) fmin, fmid, fmax = getfbounds(n_echos) - # Mask arrays - mumask = fmask(mu, t2s != 0) - t2smask = fmask(t2s, t2s != 0) - betamask = fmask(betas, t2s != 0) + # mask arrays + mumask = mu[t2s != 0] + t2smask = t2s[t2s != 0] + betamask = betas[t2s != 0] - # Setup Xmats - X1 = mumask.transpose() # Model 1 - X2 = np.tile(tes, - (1, NmD)) * mumask.transpose() / t2smask.transpose() # Model 2 + # setup Xmats + X1 = mumask.T # Model 1 + X2 = np.tile(tes, (1, NmD)) * mumask.T / t2smask.T # Model 2 - # Tables for component selection + # tables for component selection global Kappas, Rhos, varex, varex_norm global Z_maps, F_R2_maps, F_S0_maps global Z_clmaps, F_R2_clmaps, F_S0_clmaps global Br_clmaps_R2, Br_clmaps_S0 - Kappas = np.zeros([nc]) - Rhos = np.zeros([nc]) - varex = np.zeros([nc]) - varex_norm = np.zeros([nc]) - Z_maps = np.zeros([Nm, nc]) - F_R2_maps = np.zeros([NmD, nc]) - F_S0_maps = np.zeros([NmD, nc]) - Z_clmaps = np.zeros([Nm, nc]) - F_R2_clmaps = np.zeros([NmD, nc]) - F_S0_clmaps = np.zeros([NmD, nc]) - Br_clmaps_R2 = np.zeros([Nm, nc]) - Br_clmaps_S0 = np.zeros([Nm, nc]) - - for i in range(nc): - - # size of B is (nc, nx*ny*nz) - B = np.atleast_3d(betamask)[:, :, i].transpose() + Kappas = np.zeros([n_components]) + Rhos = np.zeros([n_components]) + varex = np.zeros([n_components]) + varex_norm = np.zeros([n_components]) + Z_maps = np.zeros([Nm, n_components]) + F_R2_maps = np.zeros([NmD, n_components]) + F_S0_maps = np.zeros([NmD, n_components]) + Z_clmaps = np.zeros([Nm, n_components]) + F_R2_clmaps = np.zeros([NmD, n_components]) + F_S0_clmaps = np.zeros([NmD, n_components]) + Br_clmaps_R2 = np.zeros([Nm, n_components]) + Br_clmaps_S0 = np.zeros([Nm, n_components]) + + for i in range(n_components): + + # size of B is (n_components, n_samp) + B = np.atleast_3d(betamask)[:, :, i].T alpha = (np.abs(B)**2).sum(axis=0) - varex[i] = (tsoc_B[:, i]**2).sum()/totvar*100. - varex_norm[i] = (unmask(WTS, mask)[t2s != 0][:, i]**2).sum()/totvar_norm * 100. + varex[i] = (tsoc_B[:, i]**2).sum() / totvar * 100. + varex_norm[i] = (unmask(WTS, mask)[t2s != 0][:, i]**2).sum() / totvar_norm * 100. # S0 Model coeffs_S0 = (B * X1).sum(axis=0) / (X1**2).sum(axis=0) @@ -457,26 +463,23 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, head, coeffs_R2 = (B * X2).sum(axis=0) / (X2**2).sum(axis=0) SSE_R2 = (B - X2 * np.tile(coeffs_R2, (n_echos, 1)))**2 SSE_R2 = SSE_R2.sum(axis=0) - F_R2 = (alpha - SSE_R2)*2/(SSE_R2) + F_R2 = (alpha - SSE_R2) * 2 / (SSE_R2) F_R2_maps[:, i] = F_R2 - # Compute weights as Z-values + # compute weights as Z-values wtsZ = (WTS[:, i] - WTS[:, i].mean()) / WTS[:, i].std() wtsZ[np.abs(wtsZ) > Z_MAX] = (Z_MAX * (np.abs(wtsZ) / wtsZ))[np.abs(wtsZ) > Z_MAX] Z_maps[:, i] = wtsZ - # Compute Kappa and Rho + # compute Kappa and Rho F_S0[F_S0 > F_MAX] = F_MAX F_R2[F_R2 > F_MAX] = F_MAX - Kappas[i] = np.average(F_R2, - weights=np.abs(np.squeeze(unmask(wtsZ, - mask)[t2s != 0]**2.))) - Rhos[i] = np.average(F_S0, - weights=np.abs(np.squeeze(unmask(wtsZ, - mask)[t2s != 0]**2.))) + norm_weights = np.abs(np.squeeze(unmask(wtsZ, mask)[t2s != 0]**2.)) + Kappas[i] = np.average(F_R2, weights=norm_weights) + Rhos[i] = np.average(F_S0, weights=norm_weights) # Tabulate component values - comptab_pre = np.vstack([np.arange(nc), Kappas, Rhos, varex, varex_norm]).T + comptab_pre = np.vstack([np.arange(n_components), Kappas, Rhos, varex, varex_norm]).T if reindex: # Re-index all components in Kappa order comptab = comptab_pre[comptab_pre[:, 1].argsort()[::-1], :] @@ -501,10 +504,10 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, head, # Full selection including clustering criteria seldict = None if full_sel: - for i in range(nc): + for i in range(n_components): # Save out files - out = np.zeros((nx, ny, nz, 4)) + out = np.zeros((n_samp, 4)) if fout is not None: ccname = "cc%.3d.nii" % i else: @@ -1023,33 +1026,28 @@ def selcomps(seldict, mmix, head, manacc, debug=False, olevel=2, oversion=99, def tedpca(combmode, mask, stabilize, head, ste=0, mlepca=True): + n_samp, n_echos, n_vols = catd.shape ste = np.array([int(ee) for ee in str(ste).split(',')]) + if len(ste) == 1 and ste[0] == -1: lgr.info('-Computing PCA of optimally combined multi-echo data') - d = OCcatd[make_min_mask(OCcatd[:, np.newaxis, :])] - eim = eimask(d[:, np.newaxis, :]) - eim = eim[:, 0] == 1 - d = d[eim, :] + d = OCcatd[make_min_mask(OCcatd[:, np.newaxis, :])][:, np.newaxis, :] elif len(ste) == 1 and ste[0] == 0: lgr.info('-Computing PCA of spatially concatenated multi-echo data') - ste = np.arange(n_echos) d = catd[mask].astype('float64') - eim = eimask(d) == 1 - d = d[eim] else: lgr.info('-Computing PCA of TE #%s' % ','.join([str(ee) for ee in ste])) - d = np.concatenate([catd[mask, ee, :][:, np.newaxis] for ee in ste - 1], - axis=1).astype('float64') - eim = np.squeeze(eimask(d) == 1) - d = np.squeeze(d[eim]) + d = np.stack([catd[mask, ee] for ee in ste - 1], axis=1).astype('float64') + + eim = np.squeeze(eimask(d)) + d = np.squeeze(d[eim]) dz = ((d.T - d.T.mean(axis=0)) / d.T.std(axis=0)).T # var normalize ts dz = (dz - dz.mean()) / dz.std() # var normalize everything if not os.path.exists('pcastate.pkl'): - - # Do PC dimension selection and get eigenvalue cutoff + # do PC dimension selection and get eigenvalue cutoff if mlepca: from sklearn.decomposition import PCA ppca = PCA(n_components='mle', svd_solver='full') @@ -1060,8 +1058,9 @@ def tedpca(combmode, mask, stabilize, head, ste=0, mlepca=True): else: u, s, v = np.linalg.svd(dz, full_matrices=0) - sp = s/s.sum() - eigelb = sp[getelbow_mod(sp)] + # actual variance explained (normalized) + sp = s / s.sum() + eigelb = getelbow_mod(sp, val=True) spdif = np.abs(sp[1:] - sp[:-1]) spdifh = spdif[(spdif.shape[0]//2):] @@ -1079,7 +1078,7 @@ def tedpca(combmode, mask, stabilize, head, ste=0, mlepca=True): # Compute K and Rho for PCA comps eimum = np.atleast_2d(eim) - eimum = np.transpose(eimum, np.argsort(np.atleast_2d(eim).shape)[::-1]) + eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1]) eimum = eimum.prod(axis=1) o = np.zeros((mask.shape[0], *eimum.shape[1:])) o[mask] = eimum @@ -1160,7 +1159,7 @@ def tedpca(combmode, mask, stabilize, head, ste=0, mlepca=True): def tedica(nc, dd, conv, fixed_seed, cost, final_cost): """ Input is dimensionally reduced spatially concatenated multi-echo - time series dataset from tedpca(). Output is comptable, mmix, smaps + time series dataset from `tedpca`. Output is comptable, mmix, smaps from ICA, and betas from fitting catd to mmix. """ import mdp @@ -1176,7 +1175,7 @@ def tedica(nc, dd, conv, fixed_seed, cost, final_cost): return mmix -def gscontrol_raw(OCcatd, head, n_echos, dtrank=4): +def gscontrol_raw(catd, OCcatd, head, n_echos, dtrank=4): """ This function uses the spatial global signal estimation approach to modify catd (global variable) to removal global signal out of individual @@ -1188,9 +1187,7 @@ def gscontrol_raw(OCcatd, head, n_echos, dtrank=4): lgr.info('++ Applying amplitude-based T1 equilibration correction') # Legendre polynomial basis for denoising - n_vols = OCcatd.shape[-1] - Lmix = np.array([lpmv(0, vv, np.linspace(-1, 1, n_vols)) - for vv in range(dtrank)]).T + Lmix = np.array([lpmv(0, vv, np.linspace(-1, 1, OCcatd.shape[-1])) for vv in range(dtrank)]).T # compute mean, std, mask local to this function # inefficient, but makes this function a bit more modular @@ -1198,7 +1195,9 @@ def gscontrol_raw(OCcatd, head, n_echos, dtrank=4): Gmask = Gmu != 0 # Find spatial global signal + # BUG: this is indexing differently!!!!! and the subtraction is causing differences dat = OCcatd[Gmask] - Gmu[Gmask][:, np.newaxis] + # ^^^ THIS IS THE BAD PLACE sol = np.linalg.lstsq(Lmix, dat.T)[0] # Legendre basis for detrending detr = dat - np.dot(sol.T, Lmix.T)[0] sphis = (detr).min(axis=1) @@ -1218,16 +1217,16 @@ def gscontrol_raw(OCcatd, head, n_echos, dtrank=4): np.atleast_2d(glbase.T[dtrank])) + Gmu[Gmask][:, np.newaxis] # niwrite(OCcatd, aff, 'tsoc_orig.nii', head) # FIXME - OCcatd[Gmask] = tsoc_nogs + OCcatd = unmask(tsoc_nogs, Gmask) # niwrite(OCcatd, aff, 'tsoc_nogs.nii', head) # FIXME # Project glbase out of each echo for echo in range(n_echos): - dat = catd[Gmask, echo, :] + dat = catd[:, echo, :][Gmask] sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T)[0] e_nogs = dat - np.dot(np.atleast_2d(sol[dtrank]).T, np.atleast_2d(glbase.T[dtrank])) - catd[Gmask, echo, :] = e_nogs + catd[:, echo, :] = unmask(e_nogs, Gmask) return catd, OCcatd @@ -1496,12 +1495,12 @@ def main(options): global OCcatd OCcatd = optcom(catd, t2sG, tes, mask, combmode) if not options.no_gscontrol: - catd, OCcatd = gscontrol_raw(OCcatd, head, len(tes)) + catd, OCcatd = gscontrol_raw(catd, OCcatd, head, len(tes)) if options.mixm is None: lgr.info('++ Doing ME-PCA and ME-ICA') - nc, dd = tedpca(combmode, mask, stabilize, head, ste=options.ste) + nc, dd = tedpca(catd, combmode, mask, stabilize, head, ste=options.ste) mmix_orig = tedica(nc, dd, options.conv, options.fixed_seed, cost=options.initcost, diff --git a/tedana/utils/utils.py b/tedana/utils/utils.py index e8b25af59..08af43602 100644 --- a/tedana/utils/utils.py +++ b/tedana/utils/utils.py @@ -32,7 +32,7 @@ def load_image(data): else: data = check_niimg(data).get_data() - fdata = data.reshape((-1,) + data.shape[3:]) + fdata = data.reshape((-1,) + data.shape[3:], order='F') return fdata.squeeze() @@ -63,30 +63,22 @@ def cat2echos(data, n_echos=None): # individual echo files were provided if len(data) > 2: fdata = np.stack([load_image(f) for f in data], axis=1) - # a z-concatenated file was provided + if fdata.ndim < 3: + fdata = fdata[..., np.newaxis] + return fdata + # a z-concatenated file was provided; load data and pipe it down elif len(data) == 1: if n_echos is None: raise ValueError('Number of echos `n_echos` must be specified ' 'if z-concatenated data file provided.') - fdata = load_image(data[0]) + data = check_niimg(data[0]).get_data() # only two echo files were provided, which doesn't fly else: raise ValueError('Cannot run `tedana` with only two echos: ' '{}'.format(data)) - # ensure data has a time axis - if fdata.ndim < 3: - fdata = fdata[..., np.newaxis] - # data array was provided (is this necessary?) - elif isinstance(data, np.ndarray): - if data.ndim != 4: - raise ValueError('Data must be 4-dimensional, where the ' - 'dimensions correspond to: (1) first spatial ' - 'dimensions, (2) second spatial dimension, (3) ' - 'third spatial dimension x number of echos, and ' - '(4) time. Provided data dimensions were: ' - '{}'.format(data.shape)) - nx, ny, nz = data.shape[:2], data.shape[2] // n_echos - fdata = load_image(data.reshape(nx, ny, nz, n_echos, -1, order='F')) + + (nx, ny), nz = data.shape[:2], data.shape[2] // n_echos + fdata = load_image(data.reshape(nx, ny, nz, n_echos, -1, order='F')) return fdata @@ -268,31 +260,20 @@ def unmask(data, mask): Parameters ---------- - data : (S x E x T) array_like - Masked array, where S is samples flattened across spatial dimensions - mask : (X x Y x Z) array_like - Boolean array that was used to mask `data` + data : (M x E x T) array_like + Masked array, where `M` is the number of samples + mask : (S,) array_like + Boolean array of `S` samples that was used to mask `data` Returns ------- - fdata : (X x Y x Z x E x T) np.ndarray - Unmasked `data` array with spatial dimensions intact + out : (S x E x T) np.ndarray + Unmasked `data` array """ - M = (mask != 0).ravel() - Nm = M.sum() - - nx, ny, nz = mask.shape - - if len(data.shape) > 1: - nt = data.shape[1] - else: - nt = 1 - - out = np.zeros((nx * ny * nz, nt), dtype=data.dtype) - out[M, :] = np.reshape(data, (Nm, nt)) - - return np.squeeze(np.reshape(out, (nx, ny, nz, nt))) + out = np.zeros((mask.shape + data.shape[1:])) + out[mask] = data + return out def moments(data): From 10ef6d73e836b67df4d23b9e3e185ea2044c67d7 Mon Sep 17 00:00:00 2001 From: Ross Markello Date: Mon, 7 May 2018 00:33:35 -0400 Subject: [PATCH 05/18] [RF] niwrite --> filewrite, better gifti support More major overhauls in the process of supporting GIFTI files. The `niwrite` functions has been killed to give way to `filewrite`. Lots of stylistic changes in the proces of integrating this throughout `tedana.interfaces.tedana`, but mostly just significant new functionality in `tedana.utils`. --- tedana/cli/run.py | 2 +- tedana/interfaces/__init__.py | 4 +- tedana/interfaces/t2smap.py | 67 +++--- tedana/interfaces/tedana.py | 408 +++++++++++++++------------------- tedana/tests/test_utils.py | 4 - tedana/utils/__init__.py | 14 +- tedana/utils/utils.py | 265 ++++++++++++++++++---- 7 files changed, 446 insertions(+), 318 deletions(-) diff --git a/tedana/cli/run.py b/tedana/cli/run.py index 092d984ad..7ef7cc4f6 100644 --- a/tedana/cli/run.py +++ b/tedana/cli/run.py @@ -20,7 +20,7 @@ def get_parser(): parser.add_argument('-e', dest='tes', nargs='+', - help='Echo times (in ms) ex: 15,39,63', + help='Echo times (in ms) ex: 15.0 39.0 63.0', required=True) parser.add_argument('--mix', dest='mixm', diff --git a/tedana/interfaces/__init__.py b/tedana/interfaces/__init__.py index de85773df..94e7677f0 100644 --- a/tedana/interfaces/__init__.py +++ b/tedana/interfaces/__init__.py @@ -1,7 +1,7 @@ # emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- # ex: set sts=4 ts=4 sw=4 et: -from .t2smap import (t2sadmap, optcom) +from .t2smap import (t2sadmap, make_optcom) __all__ = [ - 't2sadmap', 'optcom'] + 't2sadmap', 'make_optcom'] diff --git a/tedana/interfaces/t2smap.py b/tedana/interfaces/t2smap.py index d78feb8c1..61020e168 100644 --- a/tedana/interfaces/t2smap.py +++ b/tedana/interfaces/t2smap.py @@ -1,8 +1,8 @@ import numpy as np -import nibabel as nib -from tedana.utils import (niwrite, cat2echos, makeadmask, unmask, fmask) +from tedana.utils import (filewrite, load_data, makeadmask, unmask, fmask) import logging +logging.basicConfig(format='[%(levelname)s]: %(message)s', level=logging.INFO) lgr = logging.getLogger(__name__) @@ -115,7 +115,7 @@ def t2sadmap(data, tes, mask, masksum, start_echo): X = np.repeat(x, n_vols, axis=0) beta, res, rank, sing = np.linalg.lstsq(X, B) - t2s = 1 / beta[1, :].T + t2s = 1. / beta[1, :].T s0 = np.exp(beta[0, :]).T t2s[np.isinf(t2s)] = 500. # why 500? @@ -141,18 +141,18 @@ def t2sadmap(data, tes, mask, masksum, start_echo): return t2sa, s0va, t2ss, s0vs, t2saf, s0vaf -def optcom(data, t2s, tes, mask, combmode): +def make_optcom(data, t2s, tes, mask, combmode): """ Optimally combine BOLD data across TEs. - out = optcom(data,t2s) + out = make_optcom(data,t2s) Parameters ---------- data : (S x E x T) :obj:`numpy.ndarray` Concatenated BOLD data. t2 : (S,) :obj:`numpy.ndarray` - 3D map of estimated T2* values. + Estimated T2* values. tes : :obj:`numpy.ndarray` Array of TEs, in seconds. mask : (S,) :obj:`numpy.ndarray` @@ -172,31 +172,27 @@ def optcom(data, t2s, tes, mask, combmode): mdata = data[mask] tes = np.array(tes)[np.newaxis] # (1 x E) array_like - if len(t2s.shape) == 3: - print('Optimally combining with voxel-wise T2 estimates') - ft2s = t2s[:, np.newaxis] + if t2s.ndim == 1: + lgr.info('Optimally combining with voxel-wise T2 estimates') + ft2s = t2s[mask, np.newaxis] else: - print('Optimally combining with voxel- and volume-wise T2 estimates') - ft2s = t2s[:, :, np.newaxis] - - if combmode == 'ste': - alpha = mdata.mean(-1) * tes - else: - alpha = tes * np.exp(-tes / ft2s) + lgr.info('Optimally combining with voxel- and volume-wise T2 estimates') + ft2s = t2s[mask, :, np.newaxis] if combmode == 'ste': alpha = mdata.mean(axis=-1) * tes else: - alpha = tes * np.exp(-tes / t2s[mask]) + alpha = tes * np.exp(-tes / ft2s) - if len(t2s.shape) == 3: + if t2s.ndim == 1: alpha = np.tile(alpha[:, :, np.newaxis], (1, 1, n_vols)) else: alpha = np.swapaxes(alpha, 1, 2) ax0_idx, ax2_idx = np.where(np.all(alpha == 0, axis=1)) alpha[ax0_idx, :, ax2_idx] = 1. - fout = unmask(np.average(mdata, axis=1, weights=alpha), mask) + fout = np.average(mdata, axis=1, weights=alpha) + fout = unmask(fout, mask) return fout @@ -216,28 +212,27 @@ def main(options): suf = '_%s' % str(options.label) else: suf = '' + tes, data, combmode = options.tes, options.data, options.combmode - tes = [float(te) for te in options.tes] + tes = [float(te) for te in tes] n_echos = len(tes) - catim = nib.load(options.data[0]) - head = catim.get_header() - head.extensions = [] - head.set_sform(head.get_sform(), code=1) - aff = catim.get_affine() - catd = cat2echos(catim.get_data(), n_echos) - nx, ny, nz, n_echos, n_trs = catd.shape + + catd = load_data(data, n_echos=n_echos) + n_samp, n_echos, n_trs = catd.shape + + ref_img = data[0] if isinstance(data, list) else data lgr.info("++ Computing Mask") mask, masksum = makeadmask(catd, minimum=False, getsum=True) - niwrite(masksum, aff, 'masksum%s.nii' % suf) + filewrite(masksum, 'masksum%s' % suf, ref_img, copy_header=False) lgr.info("++ Computing Adaptive T2* map") - t2s, s0, t2ss, s0vs, t2saf, s0vaf = t2sadmap(catd, mask, tes, masksum, 2) - niwrite(t2ss, aff, 't2ss%s.nii' % suf) - niwrite(s0vs, aff, 's0vs%s.nii' % suf) + t2s, s0, t2ss, s0vs, t2saf, s0vaf = t2sadmap(catd, tes, mask, masksum, 2) + filewrite(t2ss, 't2ss%s' % suf, ref_img, copy_header=False) + filewrite(s0vs, 's0vs%s' % suf, ref_img, copy_header=False) lgr.info("++ Computing optimal combination") - tsoc = np.array(optcom(catd, t2s, tes, mask, options.combmode), + tsoc = np.array(make_optcom(catd, t2s, tes, mask, combmode), dtype=float) # Clean up numerical errors @@ -249,7 +244,7 @@ def main(options): t2s[t2s < 0] = 0 t2sm[t2sm < 0] = 0 - niwrite(tsoc, aff, 'ocv%s.nii' % suf) - niwrite(s0, aff, 's0v%s.nii' % suf) - niwrite(t2s, aff, 't2sv%s.nii' % suf) - niwrite(t2sm, aff, 't2svm%s.nii' % suf) + filewrite(tsoc, 'ocv%s' % suf, ref_img, copy_header=False) + filewrite(s0, 's0v%s' % suf, ref_img, copy_header=False) + filewrite(t2s, 't2sv%s' % suf, ref_img, copy_header=False) + filewrite(t2sm, 't2svm%s' % suf, ref_img, copy_header=False) diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py index 09ced5ac3..b39d32efe 100644 --- a/tedana/interfaces/tedana.py +++ b/tedana/interfaces/tedana.py @@ -1,21 +1,21 @@ - import os import os.path as op import shutil import pickle import textwrap import numpy as np -import nibabel as nib from scipy import stats from sklearn import svm from scipy.special import lpmv from sklearn.cluster import DBSCAN -from tedana.interfaces import (optcom, t2sadmap) -from tedana.utils import (cat2echos, make_min_mask, - makeadmask, fmask, unmask, - fitgaussian, niwrite, dice, andb) +from tedana.interfaces import (make_optcom, t2sadmap) +from tedana.utils import (load_image, load_data, get_dtype, + make_min_mask, makeadmask, + fmask, unmask, filewrite, + fitgaussian, dice, andb) import logging +logging.basicConfig(format='[%(levelname)s]: %(message)s', level=logging.INFO) lgr = logging.getLogger(__name__) """ @@ -76,7 +76,7 @@ def do_svm(X_train, y_train, X_test, svmtype=0): return y_pred, clf -def spatclust(data, mask, csize, thr, header, aff, infile=None, dindex=0, +def spatclust(data, mask, csize, thr, ref_img, infile=None, dindex=0, tindex=0): """ Thresholds and spatially clusters `data` @@ -98,53 +98,24 @@ def spatclust(data, mask, csize, thr, header, aff, infile=None, dindex=0, clustered : """ - # threshold image - if infile is None: data = data.copy() data[data < thr] = 0 - niwrite(unmask(data, mask), aff, '__clin.nii.gz', header) - infile = '__clin.nii.gz' + infile = filewrite(unmask(data, mask), '__clin', ref_img, gzip=True) + + # FIXME: ideally no calls to os.system!!! (or AFNI, for that matter) addopts = '' - if data is not None and len(np.squeeze(data).shape) > 1 and dindex + tindex == 0: + if data is not None and data.squeeze().ndim > 1 and dindex + tindex == 0: addopts = '-doall' else: addopts = '-1dindex {0} -1tindex {1}'.format(str(dindex), str(tindex)) - # cmd_str = '3dmerge -overwrite {0} -dxyz=1 -1clust 1 {1:d} ' \ '-1thresh {2:.02f} -prefix __clout.nii.gz {3}' os.system(cmd_str.format(addopts, int(csize), float(thr), infile)) - clustered = fmask(nib.load('__clout.nii.gz').get_data(), mask) != 0 - return clustered - -def rankvec(vals): - """ - Returns ranks of array - - Parameters - ---------- - vals : array-like - 1d array from which to determine ranks. - - Returns - ------- - ranks : array-like - 1d array of ranks for values in input vals. - """ - try: - vals = np.array(vals) - except Exception: # would this ever happen???? - raise IOError('Input vals is not array_like') - - if len(vals.shape) != 1: - raise ValueError('Input vals is not 1d array') - - asort = np.argsort(vals) - ranks = np.zeros(vals.shape[0]) - ranks[asort] = np.arange(vals.shape[0]) + 1 - return ranks + clustered = load_image('__clout.nii.gz')[mask] != 0 + return clustered def get_coeffs(data, mask, X, add_const=False): @@ -188,14 +159,14 @@ def get_coeffs(data, mask, X, add_const=False): def getelbow_cons(ks, val=False): - """Elbow using mean/variance method - conservative + """ + Elbow using mean/variance method - conservative Parameters ---------- ks : array-like - val : bool, optional - Default is False + Return the value of the elbow instead of the index. Default: False Returns ------- @@ -203,6 +174,7 @@ def getelbow_cons(ks, val=False): Either the elbow index (if val is True) or the values at the elbow index (if val is False) """ + ks = np.sort(ks)[::-1] nk = len(ks) temp1 = [(ks[nk - 5 - ii - 1] > ks[nk - 5 - ii:nk].mean() + 2 * ks[nk - 5 - ii:nk].std()) @@ -223,14 +195,14 @@ def getelbow_cons(ks, val=False): def getelbow_mod(ks, val=False): - """Elbow using linear projection method - moderate + """ + Elbow using linear projection method - moderate Parameters ---------- ks : array-like - val : bool, optional - Default is False + Return the value of the elbow instead of the index. Default: False Returns ------- @@ -256,7 +228,8 @@ def getelbow_mod(ks, val=False): def getelbow_aggr(ks, val=False): - """Elbow using curvature - aggressive + """ + Elbow using curvature - aggressive Parameters ---------- @@ -271,6 +244,7 @@ def getelbow_aggr(ks, val=False): Either the elbow index (if val is True) or the values at the elbow index (if val is False) """ + ks = np.sort(ks)[::-1] dKdt = ks[:-1] - ks[1:] dKdt2 = dKdt[:-1] - dKdt[1:] @@ -286,15 +260,17 @@ def getelbow_aggr(ks, val=False): def getfbounds(n_echos): """ - Parameters ---------- n_echos : int - Number of echoes. + Number of echoes Returns ------- + fmin, fmid, fmax : float + Minimum, mid, and max F bounds """ + if not isinstance(n_echos, int): raise IOError('Input n_echos must be int') elif n_echos <= 0: @@ -311,6 +287,7 @@ def eimask(dd, ees=None): """ Returns mask for data between [0.001, 5] * 98th percentile of dd """ + if ees is None: ees = range(dd.shape[1]) imask = np.zeros([dd.shape[0], len(ees)], dtype=bool) @@ -340,7 +317,7 @@ def computefeats2(data, mmix, mask, normalize=True): # Write feature versions of components data = data[mask] # demean data - data_vn = (data - data.mean(axis=-1, keepdims=True)) / data.std(axis=-1, keepdims=True) + data_vn = stats.zscore(data, axis=-1) # get betas for demeaned data against `mmix` data_R = get_coeffs(unmask(data_vn, mask), mask, mmix)[mask] # cap betas to range [-0.999, 0.999] @@ -352,8 +329,8 @@ def computefeats2(data, mmix, mask, normalize=True): data_Z = np.atleast_2d(data_Z).T if normalize: # standardize - data_Zm = (data_Z - data_Z.mean(axis=0, keepdims=True)) / data_Z.std(axis=0, keepdims=True) - # add back mean / stdev + data_Zm = stats.zscore(data_Z, axis=0) + # add back (mean / stdev) data_Z = data_Zm + (data_Z.mean(axis=0, keepdims=True) / data_Z.std(axis=0, keepdims=True)) return data_Z @@ -369,17 +346,31 @@ def ctabsel(ctabfile): return tuple([np.array(class_dict[kk], dtype=int) for kk in class_tags]) -def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, head, +def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img, fout=None, reindex=False, mmixN=None, full_sel=True): """ - Input: - fout is flag for output of per-component TE-dependence maps - t2s is a (nx,ny,nz) ndarray - tes is a 1d array + Parameters + ---------- + catd : (S x E x T) array_like + mmix : (T x C) array_like + mask : (S,) array_like + t2s : (S,) array_like + t2sG : (S,) array_like + tes : (E,) list + combmode : str + ref_img : str or img_like + fout : bool + Whether to output per-component TE-dependencen maps Default: None + reindex : bool, optional + Default: False + mmixN : array_like, optional + Default: None + full_sel : bool, optional + Default: True """ # compute optimal combination of raw data - tsoc = np.array(optcom(catd, t2sG, tes, mask, combmode), + tsoc = np.array(make_optcom(catd, t2sG, tes, mask, combmode), dtype=float)[mask] # demean optimal combination tsoc_dm = tsoc - tsoc.mean(axis=-1, keepdims=True) @@ -407,10 +398,7 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, head, totvar_norm = (WTS**2).sum() # compute Betas and means over TEs for TE-dependence analysis - n_echos = len(tes) - betas = get_coeffs(catd, - np.repeat(mask[:, np.newaxis], n_echos, axis=1), - mmix) + betas = get_coeffs(catd, np.repeat(mask[:, np.newaxis], len(tes), axis=1), mmix) n_samp, n_echos, n_components = betas.shape n_voxels = mask.sum() n_data_voxels = (t2s != 0).sum() @@ -423,9 +411,9 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, head, t2smask = t2s[t2s != 0] betamask = betas[t2s != 0] - # setup Xmats + # set up Xmats X1 = mumask.T # Model 1 - X2 = np.tile(tes, (1, n_voxels)) * mumask.T / t2smask.T # Model 2 + X2 = np.tile(tes, (1, n_data_voxels)) * mumask.T / t2smask.T # Model 2 # tables for component selection global Kappas, Rhos, varex, varex_norm @@ -478,10 +466,10 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, head, Kappas[i] = np.average(F_R2, weights=norm_weights) Rhos[i] = np.average(F_S0, weights=norm_weights) - # Tabulate component values + # tabulate component values comptab_pre = np.vstack([np.arange(n_components), Kappas, Rhos, varex, varex_norm]).T if reindex: - # Re-index all components in Kappa order + # re-index all components in Kappa order comptab = comptab_pre[comptab_pre[:, 1].argsort()[::-1], :] Kappas = comptab[:, 1] Rhos = comptab[:, 2] @@ -501,53 +489,57 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, head, comptab = comptab_pre mmix_new = mmix - # Full selection including clustering criteria + # full selection including clustering criteria seldict = None if full_sel: for i in range(n_components): - # Save out files + # save out files out = np.zeros((n_samp, 4)) if fout is not None: - ccname = "cc%.3d.nii" % i + ccname, gzip = 'cc{:03d}'.format(i), False else: - ccname = ".cc_temp.nii.gz" + ccname, gzip = '.cc_temp', True - out[:, :, :, 0] = np.squeeze(unmask(PSC[:, i], mask)) - out[:, :, :, 1] = np.squeeze(unmask(F_R2_maps[:, i], t2s != 0)) - out[:, :, :, 2] = np.squeeze(unmask(F_S0_maps[:, i], t2s != 0)) - out[:, :, :, 3] = np.squeeze(unmask(Z_maps[:, i], mask)) + out[:, 0] = np.squeeze(unmask(PSC[:, i], mask)) + out[:, 1] = np.squeeze(unmask(F_R2_maps[:, i], t2s != 0)) + out[:, 2] = np.squeeze(unmask(F_S0_maps[:, i], t2s != 0)) + out[:, 3] = np.squeeze(unmask(Z_maps[:, i], mask)) + + filewrite(out, ccname, ref_img, gzip=gzip) + + if get_dtype(ref_img) == 'GIFTI': + continue # TODO: pass through GIFTI file data as below - niwrite(out, fout, ccname, head) os.system('3drefit -sublabel 0 PSC -sublabel 1 F_R2 -sublabel 2 F_SO ' - '-sublabel 3 Z_sn %s 2> /dev/null > /dev/null' % ccname) + '-sublabel 3 Z_sn {} 2> /dev/null > /dev/null'.format(ccname)) csize = np.max([int(n_voxels * 0.0005) + 5, 20]) # Do simple clustering on F # TODO: can be replaced with nilearn.image.threshold_img - os.system("3dcalc -overwrite -a %s[1..2] -expr 'a*step(a-%i)' -prefix .fcl_in.nii.gz " - "-overwrite" % (ccname, fmin)) + os.system('3dcalc -overwrite -a {}[1..2] -expr \'a*step(a-{})\' -prefix ' + '.fcl_in.nii.gz -overwrite'.format(ccname, fmin)) # TODO: can be replaced with nilearn.regions.connected_regions - os.system('3dmerge -overwrite -dxyz=1 -1clust 1 %i -doall ' - '-prefix .fcl_out.nii.gz .fcl_in.nii.gz' % (csize)) - sel = fmask(nib.load('.fcl_out.nii.gz').get_data(), t2s != 0) != 0 - sel = np.array(sel, dtype=np.int) + os.system('3dmerge -overwrite -dxyz=1 -1clust 1 {} -doall ' + '-prefix .fcl_out.nii.gz .fcl_in.nii.gz'.format(csize)) + sel = load_image('.fcl_out.nii.gz')[t2s != 0] + sel = np.array(sel != 0, dtype=np.int) F_R2_clmaps[:, i] = sel[:, 0] F_S0_clmaps[:, i] = sel[:, 1] # Do simple clustering on Z at p<0.05 - sel = spatclust(None, mask, csize, 1.95, head, aff, + sel = spatclust(None, mask, csize, 1.95, ref_img, infile=ccname, dindex=3, tindex=3) Z_clmaps[:, i] = sel # Do simple clustering on ranked signal-change map countsigFR2 = F_R2_clmaps[:, i].sum() countsigFS0 = F_S0_clmaps[:, i].sum() - Br_clmaps_R2[:, i] = spatclust(rankvec(tsoc_Babs[:, i]), mask, - csize, max(tsoc_Babs.shape)-countsigFR2, head, aff) - Br_clmaps_S0[:, i] = spatclust(rankvec(tsoc_Babs[:, i]), mask, + Br_clmaps_R2[:, i] = spatclust(stats.rankdata(tsoc_Babs[:, i]), mask, + csize, max(tsoc_Babs.shape)-countsigFR2, ref_img) + Br_clmaps_S0[:, i] = spatclust(stats.rankdata(tsoc_Babs[:, i]), mask, csize, max(tsoc_Babs.shape)-countsigFS0, - head, aff) + ref_img) seldict = {} selvars = ['Kappas', 'Rhos', 'WTS', 'varex', 'Z_maps', 'F_R2_maps', @@ -667,7 +659,7 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi mask)[:, :, :, ii]))) fproj_z = fproj.max(2) fproj[fproj == fproj.max()] = 0 - fproj_arr[:, ii] = rankvec(fproj_z.flatten()) + fproj_arr[:, ii] = stats.rankdata(fproj_z.flatten()) fproj_arr_val[:, ii] = fproj_z.flatten() spr.append(np.array(fproj_z > fproj_z.max() / 4, dtype=np.int).sum()) fprojr = np.array([fproj, fproj[:, :, ::-1]]).max(0) @@ -882,9 +874,9 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi # To write out veinmask veinout = np.zeros(t2s.shape) veinout[t2s!=0] = veinmaskf - niwrite(veinout,aff,'veinmaskf.nii',head) - veinBout = unmask(veinmaskB,mask) - niwrite(veinBout,aff,'veins50.nii',head) + filewrite(veinout, 'veinmaskf', ref_img) + veinBout = unmask(veinmaskB, mask) + filewrite(veinBout, 'veins50', ref_img) """ tsoc_B_Zcl = np.zeros(tsoc_B.shape) @@ -928,7 +920,7 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi group0_res = np.intersect1d(KRguess, group0) phys_var_zs.append((vvex - vvex[group0_res].mean()) / vvex[group0_res].std()) veinBout = unmask(veinmaskB, mask) - niwrite(veinBout, aff, 'veins_l%i.nii' % t2sl_i, head) + filewrite(veinBout, 'veins_l%i' % t2sl_i, ref_img) # Mask to sample veins phys_var_z = np.array(phys_var_zs).max(0) @@ -946,8 +938,8 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi phys_art = np.setdiff1d(nc[andb([phys_var_z > 3.5, Kappas < minK_ign]) == 2], group0) phys_art = np.union1d(np.setdiff1d(nc[andb([phys_var_z > 2, - (rankvec(phys_var_z) - - rankvec(Kappas)) > newcest / 2, + (stats.rankdata(phys_var_z) - + stats.rankdata(Kappas)) > newcest / 2, Vz2 > -1]) == 3], group0), phys_art) # Want to replace field_art with an acf/SVM based approach @@ -955,8 +947,8 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi field_art = np.setdiff1d(nc[andb([mmix_kurt_z_max > 5, Kappas < minK_ign]) == 2], group0) field_art = np.union1d(np.setdiff1d(nc[andb([mmix_kurt_z_max > 2, - (rankvec(mmix_kurt_z_max) - - rankvec(Kappas)) > newcest / 2, + (stats.rankdata(mmix_kurt_z_max) - + stats.rankdata(Kappas)) > newcest / 2, Vz2 > 1, Kappas < F01]) == 4], group0), field_art) field_art = np.union1d(np.setdiff1d(nc[andb([mmix_kurt_z_max > 3, Vz2 > 3, @@ -964,7 +956,7 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi group0), field_art) field_art = np.union1d(np.setdiff1d(nc[andb([mmix_kurt_z_max > 5, Vz2 > 5]) == 2], group0), field_art) - misc_art = np.setdiff1d(nc[andb([(rankvec(Vz) - rankvec(Ktz)) > newcest / 2, + misc_art = np.setdiff1d(nc[andb([(stats.rankdata(Vz) - stats.rankdata(Ktz)) > newcest / 2, Kappas < Khighelbowval]) == 2], group0) ign_cand = np.unique(list(field_art)+list(phys_art)+list(misc_art)) midkrej = np.union1d(midk, rej) @@ -1002,7 +994,7 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi return list(sorted(ncl)), list(sorted(rej)), list(sorted(midk)), list(sorted(ign)) -def tedpca(combmode, mask, stabilize, head, tes, kdaw, rdaw, ste=0, +def tedpca(catd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, ste=0, mlepca=True): n_samp, n_echos, n_vols = catd.shape ste = np.array([int(ee) for ee in str(ste).split(',')]) @@ -1039,19 +1031,11 @@ def tedpca(combmode, mask, stabilize, head, tes, kdaw, rdaw, ste=0, sp = s / s.sum() eigelb = getelbow_mod(sp, val=True) - spdif = np.abs(sp[1:] - sp[:-1]) - spdifh = spdif[(spdif.shape[0]//2):] - spdmin = spdif.min() - spdthr = np.mean([spdifh.max(), spdmin]) - spmin = sp[(spdif.shape[0]//2) + - (np.arange(spdifh.shape[0])[spdifh >= spdthr][0]) + - 1] - spcum = [] - spcumv = 0 - for sss in sp: - spcumv += sss - spcum.append(spcumv) - spcum = np.array(spcum) + spdif = np.abs(np.diff(sp)) + spdifh = spdif[(len(spdif)//2):] + spdthr = np.mean([spdifh.max(), spdif.min()]) + spmin = sp[(len(spdif)//2) + np.arange(len(spdifh))[spdifh >= spdthr][0] + 1] + spcum = np.cumsum(sp) # Compute K and Rho for PCA comps eimum = np.atleast_2d(eim) @@ -1064,7 +1048,7 @@ def tedpca(combmode, mask, stabilize, head, tes, kdaw, rdaw, ste=0, vTmix = v.T vTmixN = ((vTmix.T - vTmix.T.mean(0)) / vTmix.T.std(0)).T _, ctb, betasv, v_T = fitmodels_direct(catd, v.T, eimum, t2s, t2sG, - tes, combmode, head, + tes, combmode, ref_img, mmixN=vTmixN, full_sel=False) ctb = ctb[ctb[:, 0].argsort(), :] ctb = np.vstack([ctb.T[:3], sp]).T @@ -1127,8 +1111,8 @@ def tedpca(combmode, mask, stabilize, head, tes, kdaw, rdaw, ste=0, lgr.info('--Selected {0} components. Minimum Kappa={1:.02f} ' 'Rho={2:.02f}'.format(n_components, kappa_thr, rho_thr)) - dd = ((dd.T - dd.T.mean(0)) / dd.T.std(0)).T # Variance normalize timeseries - dd = (dd - dd.mean()) / dd.std() # Variance normalize everything + dd = stats.zscore(dd.T, axis=0).T # variance normalize timeseries + dd = stats.zscore(dd, axis=None) # variance normalize everything return n_components, dd @@ -1139,6 +1123,7 @@ def tedica(n_components, dd, conv, fixed_seed, cost, final_cost): time series dataset from `tedpca`. Output is comptable, mmix, smaps from ICA, and betas from fitting catd to mmix. """ + import mdp climit = float(conv) mdp.numx_rand.seed(fixed_seed) @@ -1152,7 +1137,7 @@ def tedica(n_components, dd, conv, fixed_seed, cost, final_cost): return mmix -def gscontrol_raw(catd, OCcatd, head, n_echos, dtrank=4): +def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4): """ This function uses the spatial global signal estimation approach to modify catd (global variable) to removal global signal out of individual @@ -1164,27 +1149,26 @@ def gscontrol_raw(catd, OCcatd, head, n_echos, dtrank=4): lgr.info('++ Applying amplitude-based T1 equilibration correction') # Legendre polynomial basis for denoising - Lmix = np.array([lpmv(0, vv, np.linspace(-1, 1, OCcatd.shape[-1])) for vv in range(dtrank)]).T + bounds = np.linspace(-1, 1, optcom.shape[-1]) + Lmix = np.column_stack([lpmv(0, vv, bounds) for vv in range(dtrank)]) # compute mean, std, mask local to this function # inefficient, but makes this function a bit more modular - Gmu = OCcatd.mean(axis=-1) # temporal mean + Gmu = optcom.mean(axis=-1) # temporal mean Gmask = Gmu != 0 - # Find spatial global signal - # BUG: this is indexing differently!!!!! and the subtraction is causing differences - dat = OCcatd[Gmask] - Gmu[Gmask][:, np.newaxis] - # ^^^ THIS IS THE BAD PLACE + # find spatial global signal + dat = optcom[Gmask] - Gmu[Gmask][:, np.newaxis] sol = np.linalg.lstsq(Lmix, dat.T)[0] # Legendre basis for detrending detr = dat - np.dot(sol.T, Lmix.T)[0] sphis = (detr).min(axis=1) sphis -= sphis.mean() - # niwrite(unmask(sphis, Gmask), aff, 'T1gs.nii', head) # FIXME + filewrite(unmask(sphis, Gmask), 'T1gs', ref_img) - # Find time course ofc the spatial global signal + # find time course ofc the spatial global signal # make basis with the Legendre basis glsig = np.linalg.lstsq(np.atleast_2d(sphis).T, dat)[0] - glsig = (glsig - glsig.mean()) / glsig.std() + glsig = stats.zscore(glsig, axis=None) np.savetxt('glsig.1D', glsig) glbase = np.hstack([Lmix, glsig.T]) @@ -1193,42 +1177,43 @@ def gscontrol_raw(catd, OCcatd, head, n_echos, dtrank=4): tsoc_nogs = dat - np.dot(np.atleast_2d(sol[dtrank]).T, np.atleast_2d(glbase.T[dtrank])) + Gmu[Gmask][:, np.newaxis] - # niwrite(OCcatd, aff, 'tsoc_orig.nii', head) # FIXME - OCcatd = unmask(tsoc_nogs, Gmask) - # niwrite(OCcatd, aff, 'tsoc_nogs.nii', head) # FIXME + filewrite(optcom, 'tsoc_orig', ref_img) + optcom = unmask(tsoc_nogs, Gmask) + filewrite(optcom, 'tsoc_nogs.nii', ref_img) # Project glbase out of each echo + dm_catd = catd.copy() # don't overwrite catd for echo in range(n_echos): - dat = catd[:, echo, :][Gmask] + dat = dm_catd[:, echo, :][Gmask] sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T)[0] e_nogs = dat - np.dot(np.atleast_2d(sol[dtrank]).T, np.atleast_2d(glbase.T[dtrank])) - catd[:, echo, :] = unmask(e_nogs, Gmask) + dm_catd[:, echo, :] = unmask(e_nogs, Gmask) - return catd, OCcatd + return dm_catd, optcom -def gscontrol_mmix(mmix, acc, rej, midk, empty, head): +def gscontrol_mmix(mmix, acc, rej, midk, empty, ref_img): - Gmu = OCcatd.mean(-1) - Gstd = OCcatd.std(-1) - Gmask = Gmu != 0 + Gmu = OCcatd.mean(axis=-1) + Gstd = OCcatd.std(axis=-1) + Gmask = (Gmu != 0) """ Compute temporal regression """ dat = (OCcatd[Gmask] - Gmu[Gmask][:, np.newaxis]) / Gstd[mask][:, np.newaxis] - solG = np.linalg.lstsq(mmix, dat.T) - resid = dat - np.dot(solG[0].T, mmix.T) + solG = np.linalg.lstsq(mmix, dat.T)[0] + resid = dat - np.dot(solG.T, mmix.T) """ Build BOLD time series without amplitudes, and save T1-like effect """ - bold_ts = np.dot(solG[0].T[:, acc], mmix[:, acc].T) - sphis = bold_ts.min(-1) + bold_ts = np.dot(solG.T[:, acc], mmix[:, acc].T) + sphis = bold_ts.min(axis=-1) sphis -= sphis.mean() lgr.info(sphis.shape) - niwrite(unmask(sphis, mask), aff, 'sphis_hik.nii', head) + filewrite(unmask(sphis, mask), 'sphis_hik', ref_img) """ Find the global signal based on the T1-like effect @@ -1240,14 +1225,14 @@ def gscontrol_mmix(mmix, acc, rej, midk, empty, head): T1 correct time series by regression """ bold_noT1gs = bold_ts - np.dot(np.linalg.lstsq(glsig.T, bold_ts.T)[0].T, glsig) - niwrite(unmask(bold_noT1gs*Gstd[mask][:, np.newaxis], mask), - aff, 'hik_ts_OC_T1c.nii', head) + filewrite(unmask(bold_noT1gs * Gstd[mask][:, np.newaxis], mask), + 'hik_ts_OC_T1c.nii', ref_img) """ Make medn version of T1 corrected time series """ - niwrite(Gmu[:, :, :, np.newaxis] + unmask((bold_noT1gs+resid)*Gstd[mask][:, np.newaxis], mask), - aff, 'dn_ts_OC_T1c.nii', head) + filewrite(Gmu[..., np.newaxis] + unmask((bold_noT1gs+resid)*Gstd[mask][:, np.newaxis], mask), + 'dn_ts_OC_T1c', ref_img) """ Orthogonalize mixing matrix w.r.t. T1-GS @@ -1255,20 +1240,20 @@ def gscontrol_mmix(mmix, acc, rej, midk, empty, head): mmixnogs = mmix.T - np.dot(np.linalg.lstsq(glsig.T, mmix)[0].T, glsig) mmixnogs_mu = mmixnogs.mean(-1) mmixnogs_std = mmixnogs.std(-1) - mmixnogs_norm = (mmixnogs-mmixnogs_mu[:, np.newaxis])/mmixnogs_std[:, np.newaxis] + mmixnogs_norm = (mmixnogs - mmixnogs_mu[:, np.newaxis]) / mmixnogs_std[:, np.newaxis] mmixnogs_norm = np.vstack([np.atleast_2d(np.ones(max(glsig.shape))), glsig, mmixnogs_norm]) """ Write T1-GS corrected components and mixing matrix """ sol = np.linalg.lstsq(mmixnogs_norm.T, dat.T) - niwrite(unmask(sol[0].T[:, 2:], mask), aff, 'betas_hik_OC_T1c.nii', head) + filewrite(unmask(sol[0].T[:, 2:], mask), 'betas_hik_OC_T1c', ref_img) np.savetxt('meica_mix_T1c.1D', mmixnogs) -def write_split_ts(data, comptable, mmix, acc, rej, midk, head, suffix=''): +def write_split_ts(data, comptable, mmix, acc, rej, midk, ref_img, suffix=''): mdata = fmask(data, mask) - betas = fmask(get_coeffs(unmask((mdata.T-mdata.T.mean(0)).T, mask), + betas = fmask(get_coeffs(unmask((mdata.T - mdata.T.mean(0)).T, mask), mask, mmix), mask) dmdata = mdata.T-mdata.T.mean(0) varexpl = (1-((dmdata.T-betas.dot(mmix.T))**2.).sum()/(dmdata**2.).sum())*100 @@ -1276,23 +1261,21 @@ def write_split_ts(data, comptable, mmix, acc, rej, midk, head, suffix=''): midkts = betas[:, midk].dot(mmix.T[midk, :]) lowkts = betas[:, rej].dot(mmix.T[rej, :]) if len(acc) != 0: - niwrite(unmask(betas[:, acc].dot(mmix.T[acc, :]), mask), - aff, 'hik_ts_{0}.nii'.format(suffix), head) + filewrite(unmask(betas[:, acc].dot(mmix.T[acc, :]), mask), + 'hik_ts_{0}'.format(suffix), ref_img) if len(midk) != 0: - niwrite(unmask(midkts, mask), aff, 'midk_ts_{0}.nii'.format(suffix), - head) + filewrite(unmask(midkts, mask), 'midk_ts_{0}'.format(suffix), ref_img) if len(rej) != 0: - niwrite(unmask(lowkts, mask), aff, 'lowk_ts_{0}.nii'.format(suffix), - head) - niwrite(unmask(fmask(data, mask)-lowkts-midkts, mask), aff, - 'dn_ts_{0}.nii'.format(suffix), head) + filewrite(unmask(lowkts, mask), 'lowk_ts_{0}'.format(suffix), ref_img) + filewrite(unmask(data[mask] - lowkts - midkts, mask), + 'dn_ts_{0}'.format(suffix), ref_img) return varexpl -def writefeats(data, mmix, mask, head, suffix=''): +def writefeats(data, mmix, mask, ref_img, suffix=''): # Write feature versions of components feats = computefeats2(data, mmix, mask) - niwrite(unmask(feats, mask), aff, 'feats_{0}.nii'.format(suffix), head) + filewrite(unmask(feats, mask), 'feats_{0}'.format(suffix), ref_img) def writect(comptable, nt, acc, rej, midk, empty, ctname='', varexpl='-1'): @@ -1341,32 +1324,32 @@ def writect(comptable, nt, acc, rej, midk, empty, ctname='', varexpl='-1'): sortab[i, 4])) -def writeresults(OCcatd, comptable, mmix, nt, acc, rej, midk, empty, head): +def writeresults(OCcatd, comptable, mmix, nt, acc, rej, midk, empty, ref_img): lgr.info('++ Writing optimally combined time series') ts = OCcatd - niwrite(ts, aff, 'ts_OC.nii', head) + filewrite(ts, 'ts_OC', ref_img) print("++ Writing Kappa-filtered optimally combined timeseries") - varexpl = write_split_ts(ts, comptable, mmix, acc, rej, midk, head, + varexpl = write_split_ts(ts, comptable, mmix, acc, rej, midk, ref_img, suffix='OC') print("++ Writing signal versions of components") ts_B = get_coeffs(ts, mask, mmix) - niwrite(ts_B[:, :, :, :], aff, 'betas_OC.nii', head) + filewrite(ts_B[:, :, :, :], 'betas_OC', ref_img) if len(acc) != 0: - niwrite(ts_B[:, :, :, acc], aff, 'betas_hik_OC.nii', head) + filewrite(ts_B[:, :, :, acc], 'betas_hik_OC', ref_img) print("++ Writing optimally combined high-Kappa features") writefeats(split_ts(ts, comptable, mmix, acc, rej, midk)[0], - mmix[:, acc], mask, head, suffix='OC2') + mmix[:, acc], mask, ref_img, suffix='OC2') print("++ Writing component table") writect(comptable, nt, acc, rej, midk, empty, ctname='comp_table.txt', varexpl=varexpl) -def writeresults_echoes(acc, rej, midk, head, comptable, mmix, n_echos): +def writeresults_echoes(acc, rej, midk, ref_img, comptable, mmix, n_echos): for i_echo in range(n_echos): print("++ Writing Kappa-filtered TE#%i timeseries" % (i_echo+1)) write_split_ts(catd[:, :, :, i_echo, :], comptable, mmix, - acc, rej, midk, head, suffix='e%i' % (i_echo+1)) + acc, rej, midk, ref_img, suffix='e%i' % (i_echo+1)) def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False, @@ -1377,8 +1360,8 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False, """ Parameters ---------- - data : :obj:`str` or :obj:`list` of :obj:`str` - Either a single z-concatenated file (str or single-entry list) or a + data : :obj:`list` of :obj:`str` + Either a single z-concatenated file (single-entry list) or a list of echo-specific files, in ascending order. tes : :obj:`list` List of echo times associated with data in milliseconds. @@ -1426,49 +1409,23 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False, fixed_seed : :obj:`int`, optional Seeded value for ICA, for reproducibility. """ - global catd, head, aff - tes = [float(te) for te in tes] - n_echos = len(tes) - # TODO: attempt to derive input data format as soon as possible - # we'll need to carry this through to writing out all the resultant output - # files for the rest of the script; options should include .nii and .gii - # - # output_type = get_input_type(options.data) + global catd, ref_img - if isinstance(data, str): - catim = nib.load(data) - elif len(data) == 1: - catim = nib.load(data[0]) - else: - if len(data) != n_echos: - raise ValueError('Number of single-echo "data" files does not ' - 'match number of echos ' - '({0} != {1})'.format(len(data), n_echos)) - imgs = [nib.load(f) for f in data] - if not np.array_equal([img.affine for img in imgs]): - raise ValueError('All affines from files in "data" must be equal.') - zcat_data = np.dstack([img.get_data() for img in imgs]) - catim = nib.Nifti1Image(zcat_data, imgs[0].affine, - header=imgs[0].get_header()) - - # Prepare image metadata for output files - head = catim.get_header() - head.extensions = [] - head.set_sform(head.get_sform(), code=1) - aff = catim.affine + # ensure tes are in appropriate format + tes = [float(te) for te in tes] + n_echos = len(tes) # coerce data to samples x echos x time array - catd = cat2echos(data, n_echos=n_echos) + catd, ref_img = load_data(data, n_echos=n_echos) n_samp, n_echos, n_vols = catd.shape if fout: - fout = aff + fout = ref_img else: fout = None - kdaw = float(kdaw) - rdaw = float(rdaw) + kdaw, rdaw = float(kdaw), float(rdaw) if label is not None: out_dir = 'TED.{0}'.format(label) @@ -1498,52 +1455,57 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False, lgr.info('++ Computing T2* map') global t2s, s0, t2sG - t2s, s0, t2ss, s0s, t2sG, s0G = t2sadmap(catd, tes, mask, masksum, + t2s, s0, t2ss, s0s, t2sG, s0G = t2sadmap(catd, tes, + mask, masksum, start_echo=1) # set a hard cap for the T2* map - # anything that is 10x higher than the 99.5 %ile will be reset to + # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile cap_t2s = stats.scoreatpercentile(t2s.flatten(), 99.5, interpolation_method='lower') - t2s[t2s > cap_t2s*10] = cap_t2s - niwrite(s0, aff, op.join(out_dir, 's0v.nii'), head) - niwrite(t2s, aff, op.join(out_dir, 't2sv.nii'), head) - niwrite(t2ss, aff, op.join(out_dir, 't2ss.nii'), head) - niwrite(s0s, aff, op.join(out_dir, 's0vs.nii'), head) - niwrite(s0G, aff, op.join(out_dir, 's0vG.nii'), head) - niwrite(t2sG, aff, op.join(out_dir, 't2svG.nii'), head) - - # Optimally combine data + t2s[t2s > cap_t2s * 10] = cap_t2s + filewrite(s0, op.join(out_dir, 's0v'), ref_img) + filewrite(t2s, op.join(out_dir, 't2sv'), ref_img) + filewrite(t2ss, op.join(out_dir, 't2ss'), ref_img) + filewrite(s0s, op.join(out_dir, 's0vs'), ref_img) + filewrite(s0G, op.join(out_dir, 's0vG'), ref_img) + filewrite(t2sG, op.join(out_dir, 't2svG'), ref_img) + + # optimally combine data global OCcatd - OCcatd = optcom(catd, t2sG, tes, mask, combmode) + OCcatd = make_optcom(catd, t2sG, tes, mask, combmode) + + # regress out global signal unless explicitly not desired if not no_gscontrol: - catd, OCcatd = gscontrol_raw(catd, OCcatd, head, n_echos) + catd, OCcatd = gscontrol_raw(catd, OCcatd, n_echos, ref_img) if mixm is None: lgr.info("++ Doing ME-PCA and ME-ICA") - n_components, dd = tedpca(catd, combmode, mask, stabilize, head, + n_components, dd = tedpca(catd, combmode, mask, stabilize, ref_img, tes=tes, kdaw=kdaw, rdaw=rdaw, ste=ste) mmix_orig = tedica(n_components, dd, conv, fixed_seed, cost=initcost, final_cost=finalcost) np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig) seldict, comptable, betas, mmix = fitmodels_direct(catd, mmix_orig, mask, t2s, t2sG, - tes, combmode, head, + tes, combmode, + ref_img, fout=fout, reindex=True) np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix) - acc, rej, midk, empty = selcomps(seldict, mmix, head, manacc, n_echos, + acc, rej, midk, empty = selcomps(seldict, mmix, ref_img, manacc, n_echos, strict_mode=strict, filecsdata=filecsdata) else: mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D')) seldict, comptable, betas, mmix = fitmodels_direct(catd, mmix_orig, mask, t2s, t2sG, - tes, combmode, head, + tes, combmode, + ref_img, fout=fout) if ctab is None: - acc, rej, midk, empty = selcomps(seldict, mmix, head, manacc, + acc, rej, midk, empty = selcomps(seldict, mmix, ref_img, manacc, n_echos, filecsdata=filecsdata, strict_mode=strict) @@ -1554,7 +1516,7 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False, lgr.info('** WARNING! No BOLD components detected!!! \n' '** Please check data and results!') - writeresults(OCcatd, comptable, mmix, n_vols, acc, rej, midk, empty, head) - gscontrol_mmix(mmix, acc, rej, midk, empty, head) + writeresults(OCcatd, comptable, mmix, n_vols, acc, rej, midk, empty, ref_img) + gscontrol_mmix(mmix, acc, rej, midk, empty, ref_img) if dne: - writeresults_echoes(acc, rej, midk, head, comptable, mmix, n_echos) + writeresults_echoes(acc, rej, midk, ref_img, comptable, mmix, n_echos) diff --git a/tedana/tests/test_utils.py b/tedana/tests/test_utils.py index 899f71018..14ab70305 100644 --- a/tedana/tests/test_utils.py +++ b/tedana/tests/test_utils.py @@ -48,10 +48,6 @@ def test_fitgaussian(): pass -def test_niwrite(): - pass - - def test_dice(): pass diff --git a/tedana/utils/__init__.py b/tedana/utils/__init__.py index 4059ccb24..fb85ab8e7 100644 --- a/tedana/utils/__init__.py +++ b/tedana/utils/__init__.py @@ -2,13 +2,15 @@ # ex: set sts=4 ts=4 sw=4 et: from .utils import ( - cat2echos, uncat2echos, make_min_mask, - makeadmask, fmask, unmask, - fitgaussian, niwrite, dice, andb, + load_image, load_data, + make_min_mask, makeadmask, + fmask, unmask, filewrite, + fitgaussian, dice, andb, ) __all__ = [ - 'cat2echos', 'uncat2echos', 'make_min_mask', - 'makeadmask', 'fmask', 'unmask', - 'fitgaussian', 'niwrite', 'dice', 'andb'] + 'load_image', 'load_data', + 'make_min_mask', 'makeadmask', + 'fmask', 'unmask', 'filewrite', + 'fitgaussian', 'dice', 'andb'] diff --git a/tedana/utils/utils.py b/tedana/utils/utils.py index 08af43602..a090b95db 100644 --- a/tedana/utils/utils.py +++ b/tedana/utils/utils.py @@ -1,13 +1,52 @@ """Utilities for meica package""" +import os.path as op import numpy as np import nibabel as nib from nibabel.filename_parser import splitext_addext +from nilearn.image import new_img_like from nilearn._utils import check_niimg import nilearn.masking as nimask from scipy.optimize import leastsq from ..due import due, BibTeX +FORMATS = {'.nii': 'NIFTI', + '.gii': 'GIFTI'} + + +def get_dtype(data): + """ + Determines neuroimaging format of `data` + + Parameters + ---------- + data : list-of-str or str or img_like + Data to determine format of + + Returns + ------- + dtype : {'NIFTI', 'GIFTI', 'OTHER'} str + Format of input data + """ + + if isinstance(data, list): + dtypes = np.unique([splitext_addext(d)[1] for d in data]) + if dtypes.size > 1: + raise ValueError('Provided data detected to have varying formats: ' + '{}'.format(dtypes)) + dtype = dtypes[0] + elif isinstance(data, str): + dtype = splitext_addext(data)[1] + else: # img_like? + if not hasattr(data, 'valid_exts'): + raise TypeError('Input data format cannot be detected.') + dtype = data.valid_exts[0] + + if dtype in FORMATS.keys(): + return FORMATS[dtype] + + return 'OTHER' + def load_image(data): """ @@ -15,7 +54,7 @@ def load_image(data): Parameters ---------- - data : (X x Y x Z [x T]) array_like or niimg-like object + data : (X x Y x Z [x T]) array_like or img_like object Data array or data file to be loaded / reshaped Returns @@ -25,25 +64,24 @@ def load_image(data): """ if isinstance(data, str): - root, ext, addext = splitext_addext(data) - if ext == '.gii': + if get_dtype(data) == 'GIFTI': fdata = np.column_stack([f.data for f in nib.load(data).darrays]) return fdata - else: + elif get_dtype(data) == 'NIFTI': data = check_niimg(data).get_data() - fdata = data.reshape((-1,) + data.shape[3:], order='F') + fdata = data.reshape((-1,) + data.shape[3:]) return fdata.squeeze() -def cat2echos(data, n_echos=None): +def load_data(data, n_echos=None): """ Coerces input `data` files to required 3D array output Parameters ---------- - data : (X x Y x M x T) array_like or list-of-img-like + data : (X x Y x M x T) array_like or list-of-img_like Input multi-echo data array, where `X` and `Y` are spatial dimensions, `M` is the Z-spatial dimensions with all the input echos concatenated, and `T` is time. A list of image-like objects (e.g., .nii or .gii) are @@ -56,31 +94,35 @@ def cat2echos(data, n_echos=None): ------- fdata : (S x E x T) np.ndarray Output data where `S` is samples, `E` is echos, and `T` is time + ref_img : str + Filepath to reference image for saving output files """ - # data files were provided if isinstance(data, list): - # individual echo files were provided - if len(data) > 2: - fdata = np.stack([load_image(f) for f in data], axis=1) - if fdata.ndim < 3: - fdata = fdata[..., np.newaxis] - return fdata - # a z-concatenated file was provided; load data and pipe it down - elif len(data) == 1: + if get_dtype(data) == 'GIFTI': # TODO: deal with L/R split GIFTI files + pass + if len(data) == 1: # a z-concatenated file was provided if n_echos is None: raise ValueError('Number of echos `n_echos` must be specified ' 'if z-concatenated data file provided.') - data = check_niimg(data[0]).get_data() - # only two echo files were provided, which doesn't fly - else: + data = data[0] + elif len(data) == 2: # inviable -- need more than 2 echos raise ValueError('Cannot run `tedana` with only two echos: ' '{}'.format(data)) + else: # individual echo files were provided + fdata = np.stack([load_image(f) for f in data], axis=1) + return np.atleast_3d(fdata), data[0] - (nx, ny), nz = data.shape[:2], data.shape[2] // n_echos - fdata = load_image(data.reshape(nx, ny, nz, n_echos, -1, order='F')) + # we have a z-cat file + img = check_niimg(data) + (nx, ny), nz = img.shape[:2], img.shape[2] // n_echos + fdata = load_image(img.get_data().reshape(nx, ny, nz, n_echos, -1)) - return fdata + # create reference image + ref_img = img.__class__(np.zeros((nx, ny, nz)), affine=img.affine, + header=img.header, extra=img.extra) + + return fdata, ref_img def makeadmask(data, minimum=True, getsum=False): @@ -140,7 +182,7 @@ def make_min_mask(data): """ Generates a 3D mask of `data` - Only voxels that are consistently (i.e., across time AND echoes) non-zero + Only samples that are consistently (i.e., across time AND echoes) non-zero in `data` are True in output Parameters @@ -159,34 +201,165 @@ def make_min_mask(data): return data.prod(axis=-1).prod(axis=-1).astype(bool) -def get_input_type(input): - pass +def filewrite(data, filename, ref_img, gzip=False, copy_header=True, + copy_meta=False): + """ + Writes `data` to `filename` in format of `ref_img` + + If `ref_img` dtype is GIFTI, then `data` is assumed to be stacked L/R + hemispheric and will be split and saved as two files + + Parameters + ---------- + data : (S [x T]) array_like + Data to be saved + filename : str + Filepath where data should be saved to + ref_img : str or img_like + Reference image + gzip : bool, optional + Whether to gzip output (if not specified in `filename`). Only applies + if output dtype is NIFTI. Default: False + copy_header : bool, optional + Whether to copy header from `ref_img` to new image. Default: True + copy_meta : bool, optional + Whether to copy meta from `ref_img` to new image. Only applies if + output dtype is GIFTI. Default: False + + Returns + ------- + name : str + Path of saved image (with added extensions, as appropriate) + """ + + # get datatype and reference image for comparison + dtype = get_dtype(ref_img) + if isinstance(ref_img, list): + ref_img = ref_img[0] + + # ensure that desired output type (from name) is compatible with `dtype` + root, ext, add = splitext_addext(filename) + if ext != '' and FORMATS[ext] != dtype: + raise ValueError('Cannot write {} data to {} file. Please ensure file' + 'formats are compatible'.format(dtype, FORMATS[ext])) + + if dtype == 'NIFTI': + out = new_nii_like(ref_img, data, + copy_header=copy_header) + name = '{}.{}'.format(root, 'nii.gz' if gzip else 'nii') + out.to_filename(name) + elif dtype == 'GIFTI': + # remove possible hemispheric denotations from root + root = op.join(op.dirname(root), op.basename(root).split('.')[0]) + # save hemispheres separately + for n, (hdata, hemi) in enumerate(zip(np.split(data, 2, axis=0), + ['L', 'R'])): + out = new_gii_like(ref_img[n], hdata, + copy_header=copy_header, + copy_meta=copy_meta) + name = '{}.{}.func.gii'.format(root, hemi) + out.to_filename(name) + + return name + + +def new_nii_like(ref_img, data, copy_header=True): + """ + Coerces `data` into NiftiImage format like `ref_img` + + Parameters + ---------- + ref_img : str or img_like + Reference image + data : (S [x T]) array_like + Data to be saved + copy_header : bool, optional + Whether to copy header from `ref_img` to new image. Default: True + + Returns + ------- + nii : nib.nifti.NiftiXImage + NiftiImage + """ + + ref_img = check_niimg(ref_img) + nii = new_img_like(ref_img, + data.reshape(ref_img.shape[:3] + data.shape[1:]), + copy_header=copy_header) + nii.set_data_dtype(data.dtype) + + return nii -def niwrite(data, affine, name, head, outtype='.nii.gz'): +def new_gii_like(ref_img, data, copy_header=True, copy_meta=False): """ - Write out nifti file. + Coerces `data` into GiftiImage format like `ref_img` Parameters ---------- - data : array_like - affine : (4 x 4) array_like - Affine for output file - name : str - Name to save output file to - head : object - outtype : str, optional - Output type of file. Default: '.nii.gz' + ref_img : str or img_like + Reference image + data : (S [x T]) array_like + Data to be saved + copy_header : bool, optional + Whether to copy header from `ref_img` to new image. Default: True + copy_meta : bool, optional + Whether to copy meta from `ref_img` to new image. Default: False + + Returns + ------- + gii : nib.gifti.GiftiImage + GiftiImage + """ + + if isinstance(ref_img, str): + ref_img = nib.load(ref_img) + + if data.ndim == 1: + data = np.atleast_2d(data).T + + darrays = [make_gii_darray(ref_img.darrays[n], d, copy_meta=copy_meta) + for n, d in enumerate(data.T)] + gii = nib.gifti.GiftiImage(header=ref_img.header if copy_header else None, + extra=ref_img.extra, + meta=ref_img.meta if copy_meta else None, + labeltable=ref_img.labeltable, + darrays=darrays) + + return gii + + +def make_gii_darray(ref_array, data, copy_meta=False): """ + Converts `data` into GiftiDataArray format like `ref_array` + + Parameters + ---------- + ref_array : str or img_like + Reference array + data : (S,) array_like + Data to be saved + copy_meta : bool, optional + Whether to copy meta from `ref_img` to new image. Default: False + + Returns + ------- + gii : nib.gifti.GiftiDataArray + Output data array instance + """ + + if not isinstance(ref_array, nib.gifti.GiftiDataArray): + raise TypeError('Provided reference is not a GiftiDataArray.') + darray = nib.gifti.GiftiDataArray(data, + intent=ref_array.intent, + datatype=data.dtype, + encoding=ref_array.encoding, + endian=ref_array.endian, + coordsys=ref_array.coordsys, + ordering=ref_array.ind_ord, + meta=ref_array.meta if copy_meta else None) - # get rid of NaN - data[np.isnan(data)] = 0 - # set header info - header = head.copy() - header.set_data_shape(list(data.shape)) - outni = nib.Nifti1Image(data, affine, header=header) - outni.set_data_dtype('float64') - outni.to_filename(name) + return darray def uncat2echos(data): @@ -209,7 +382,7 @@ def uncat2echos(data): 'provided data has only {0}'.format(data.ndim)) (nx, ny), nz = data.shape[:2], np.prod(data.shape[2:4]) - return data.reshape(nx, ny, nz, -1, order='F') + return data.reshape(nx, ny, nz, -1) def fmask(data, mask=None): @@ -218,9 +391,9 @@ def fmask(data, mask=None): Parameters ---------- - data : (X x Y x Z [x E [x T]) array_like or niimg-like object + data : (X x Y x Z [x E [x T]) array_like or img_like object Data array or data file to be masked - mask : (X x Y x Z) array_like or niimg-like object + mask : (X x Y x Z) array_like or img_like object Boolean array or mask file Returns From 9785e424831ffc68ea4fdc25af2b90a22fd2a5de Mon Sep 17 00:00:00 2001 From: Elizabeth DuPre Date: Mon, 7 May 2018 14:47:56 -0400 Subject: [PATCH 06/18] Update csstepdata format as JSON --- tedana/interfaces/tedana.py | 38 +++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py index b39d32efe..7f5c1c3df 100644 --- a/tedana/interfaces/tedana.py +++ b/tedana/interfaces/tedana.py @@ -1,5 +1,6 @@ import os import os.path as op +import json import shutil import pickle import textwrap @@ -801,15 +802,15 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi np.union1d(group0, rej)) min_acc = np.union1d(group0, toacc_hi) to_clf = np.setdiff1d(nc, np.union1d(min_acc, rej)) - diagstepkeys = ['rej', 'KRcut', 'Kcut', 'Rcut', 'dbscanfailed', - 'midkfailed', 'KRguess', 'group0', 'min_acc', - 'toacc_hi'] - diagstepout = [] - for ddk in diagstepkeys: - diagstepout.append("%s: %s" % (ddk, eval('str(%s)' % ddk))) + diagstep_keys = [rej, KRcut, Kcut, Rcut, dbscanfailed, + midkfailed, KRguess, min_acc, toacc_hi] + diagstep_vals = ['Rejected components', 'Kappa-Rho cut point', + 'Kappa cut point', 'Rho cut point', 'DBSCAN failed to converge', + 'Mid-Kappa failed (limited BOLD signal)', 'Kappa-Rho guess', + 'min_acc', 'toacc_hi'] + with open('csstepdata.txt', 'w') as ofh: - ofh.write('\n'.join(diagstepout)) - ofh.close() + json.dumps(dict(zip(diagstep_keys, diagstep_vals)), ofh) return list(sorted(min_acc)), list(sorted(rej)), [], list(sorted(to_clf)) # Find additional components to reject based on Dice - doing this here @@ -979,15 +980,20 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi orphan = np.setdiff1d(nc, list(ncl) + list(to_ign) + list(midk) + list(rej)) if savecsdiag: - diagstepkeys = ['rej', 'KRcut', 'Kcut', 'Rcut', 'dbscanfailed', - 'KRguess', 'group0', 'dice_rej', 'rej_supp', 'to_clf', - 'midk', 'svm_acc_fail', 'toacc_hi', 'toacc_lo', - 'field_art', 'phys_art', 'misc_art', 'ncl', 'ign'] - diagstepout = [] - for ddk in diagstepkeys: - diagstepout.append("%s: %s" % (ddk, eval('str(%s)' % ddk))) + + diagstep_keys = ['Rejected components', 'Kappa-Rho cut point', 'Kappa cut', + 'Rho cut', 'DBSCAN failed to converge', 'Kappa-Rho guess', + 'Dice rejected', 'rej_supp', 'to_clf', + 'Mid-kappa components', 'svm_acc_fail', 'toacc_hi', 'toacc_lo', + 'Field artifacts', 'Physiological artifacts', + 'Miscellaneous artifacts', 'ncl', 'Ignored components'] + diagstep_vals = [rej, KRcut, Kcut, Rcut, dbscanfailed, + KRguess, dice_rej, rej_supp, to_clf, + midk, svm_acc_fail, toacc_hi, toacc_lo, + field_art, phys_art, misc_art, ncl, ign] + with open('csstepdata.txt', 'w') as ofh: - ofh.write('\n'.join(diagstepout)) + json.dumps(dict(zip(diagstep_keys, diagstep_vals)), ofh) allfz = np.array([Tz, Vz, Ktz, KRr, cnz, Rz, mmix_kurt, fdist_z]) np.savetxt('csdata.txt', allfz) From a4d9f5b214ebd799aaf1a430292ce43202616819 Mon Sep 17 00:00:00 2001 From: Elizabeth DuPre Date: Mon, 7 May 2018 14:55:56 -0400 Subject: [PATCH 07/18] Remove currently unused imports for linting --- tedana/tests/test_utils.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tedana/tests/test_utils.py b/tedana/tests/test_utils.py index 14ab70305..8a027a858 100644 --- a/tedana/tests/test_utils.py +++ b/tedana/tests/test_utils.py @@ -2,11 +2,6 @@ Tests for tedana.utils """ -import os.path -from tedana import utils -import nibabel as nb -import numpy as np - def test_load_image(): pass From e9994cddea8cc067f2089ea86af9ca4e4d5ea8fd Mon Sep 17 00:00:00 2001 From: Elizabeth DuPre Date: Mon, 7 May 2018 15:21:31 -0400 Subject: [PATCH 08/18] Address review comments --- tedana/interfaces/tedana.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py index 7f5c1c3df..d81915b3a 100644 --- a/tedana/interfaces/tedana.py +++ b/tedana/interfaces/tedana.py @@ -810,7 +810,7 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi 'min_acc', 'toacc_hi'] with open('csstepdata.txt', 'w') as ofh: - json.dumps(dict(zip(diagstep_keys, diagstep_vals)), ofh) + json.dump(dict(zip(diagstep_keys, diagstep_vals)), ofh, indent=4, sort_keys=True) return list(sorted(min_acc)), list(sorted(rej)), [], list(sorted(to_clf)) # Find additional components to reject based on Dice - doing this here @@ -993,7 +993,7 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi field_art, phys_art, misc_art, ncl, ign] with open('csstepdata.txt', 'w') as ofh: - json.dumps(dict(zip(diagstep_keys, diagstep_vals)), ofh) + json.dump(dict(zip(diagstep_keys, diagstep_vals)), ofh, indent=4, sort_keys=True) allfz = np.array([Tz, Vz, Ktz, KRr, cnz, Rz, mmix_kurt, fdist_z]) np.savetxt('csdata.txt', allfz) From 047242d443fb5ffce6a1bf3c1bb931b66a71c6d7 Mon Sep 17 00:00:00 2001 From: Elizabeth DuPre Date: Mon, 7 May 2018 15:35:40 -0400 Subject: [PATCH 09/18] Patch errors --- tedana/interfaces/tedana.py | 6 +++--- tedana/utils/__init__.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py index d81915b3a..3c060e731 100644 --- a/tedana/interfaces/tedana.py +++ b/tedana/interfaces/tedana.py @@ -802,12 +802,12 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi np.union1d(group0, rej)) min_acc = np.union1d(group0, toacc_hi) to_clf = np.setdiff1d(nc, np.union1d(min_acc, rej)) - diagstep_keys = [rej, KRcut, Kcut, Rcut, dbscanfailed, - midkfailed, KRguess, min_acc, toacc_hi] - diagstep_vals = ['Rejected components', 'Kappa-Rho cut point', + diagstep_keys = ['Rejected components', 'Kappa-Rho cut point', 'Kappa cut point', 'Rho cut point', 'DBSCAN failed to converge', 'Mid-Kappa failed (limited BOLD signal)', 'Kappa-Rho guess', 'min_acc', 'toacc_hi'] + diagstep_vals = [rej, KRcut, Kcut, Rcut, dbscanfailed, + midkfailed, KRguess, min_acc, toacc_hi] with open('csstepdata.txt', 'w') as ofh: json.dump(dict(zip(diagstep_keys, diagstep_vals)), ofh, indent=4, sort_keys=True) diff --git a/tedana/utils/__init__.py b/tedana/utils/__init__.py index fb85ab8e7..771178fd5 100644 --- a/tedana/utils/__init__.py +++ b/tedana/utils/__init__.py @@ -2,7 +2,7 @@ # ex: set sts=4 ts=4 sw=4 et: from .utils import ( - load_image, load_data, + load_image, load_data, get_dtype, make_min_mask, makeadmask, fmask, unmask, filewrite, fitgaussian, dice, andb, @@ -10,7 +10,7 @@ __all__ = [ - 'load_image', 'load_data', + 'load_image', 'load_data', 'get_dtype' 'make_min_mask', 'makeadmask', 'fmask', 'unmask', 'filewrite', 'fitgaussian', 'dice', 'andb'] From bb7eb175ce02027585f55b5ae8296d369f16b778 Mon Sep 17 00:00:00 2001 From: Ross Markello Date: Tue, 8 May 2018 00:03:15 -0400 Subject: [PATCH 10/18] [DOC] Doc-strings galore! Very basic but a start Added incredibly basic doc-strings to most of the functions in tedana.py --- tedana/interfaces/tedana.py | 592 +++++++++++++++++++++++++++++------- 1 file changed, 480 insertions(+), 112 deletions(-) diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py index 3c060e731..37f2ac3e8 100644 --- a/tedana/interfaces/tedana.py +++ b/tedana/interfaces/tedana.py @@ -37,29 +37,28 @@ def do_svm(X_train, y_train, X_test, svmtype=0): """ - sklearn's Support Vector Classification (SVC). - For svmtype=1, implemented in liblinear rather than libsvm. + Implements Support Vector Classification on provided data Parameters ---------- - X_train : {array-like, sparse matrix}, shape (n_samples, n_features) + X_train : (N1 x F) array_like Training vectors, where n_samples is the number of samples in the training dataset and n_features is the number of features. - y_train : array-like, shape (n_samples,) + y_train : (N1,) array_like Target values (class labels in classification, real numbers in regression) - X_test : {array-like, sparse matrix}, shape (n_samples, n_features) + X_test : (N2 x F) array_like Test vectors, where n_samples is the number of samples in the test dataset and n_features is the number of features. svmtype : int - Desired support vector machine type. + Desired support vector machine type Returns ------- - y_pred : array, shape (n_samples,) - Predicted class labels for samples in X_test. + y_pred : (N2,) np.ndarray + Predicted class labels for samples in `X_test` clf : {:obj:`sklearn.svm.classes.SVC`, :obj:`sklearn.svm.classes.LinearSVC`} - Trained sklearn model instance. + Trained sklearn model instance """ if svmtype == 0: @@ -69,7 +68,7 @@ def do_svm(X_train, y_train, X_test, svmtype=0): elif svmtype == 2: clf = svm.SVC(kernel='linear', probability=True) else: - raise ValueError('Input svmtype not in range (3)') + raise ValueError('Input svmtype not in [1, 2, 3]') clf.fit(X_train, y_train) y_pred = clf.predict(X_test) @@ -84,19 +83,28 @@ def spatclust(data, mask, csize, thr, ref_img, infile=None, dindex=0, Parameters ---------- - data : array_like - mask : + data : (S x T) array-like + Input data array + mask : (S,) array-like + Boolean mask array csize : int - thr : - header : - aff : - infile : - dindex : - tindex : + Size of cluster (in voxels) to retain + thr : float + Value to threshold image at before clustering + ref_img : str or img_like + Reference image to dictate how outputs are saved to disk + infile : str, optional + Path to file that should be used for clustering instead of `data`. + Default: None + dindex : int, optional + Index of data (2nd dimension) to use for clustering. Default: 0 + tindex : int, optional + Index of data (2nd dimension) to use for thresholding. Default: 0 Returns ------- - clustered : + clustered : (S x T) np.ndarray + Boolean array indicated data samples to be retained after clustering """ if infile is None: @@ -121,7 +129,7 @@ def spatclust(data, mask, csize, thr, ref_img, infile=None, dindex=0, def get_coeffs(data, mask, X, add_const=False): """ - get_coeffs(data, X) + Performs least-squares fit of `X` against `data` Parameters ---------- @@ -130,33 +138,33 @@ def get_coeffs(data, mask, X, add_const=False): mask : (S,) array-like Boolean mask array X : (T x C) array-like - Array where `T` is time and `C` is components + Array where `T` is time and `C` is predictor variables add_const : bool, optional Add intercept column to `X` before fitting. Default: False Returns ------- - out : (S x C) np.ndarray - Array of betas for all samples `S` + betas : (S x C) np.ndarray + Array of `S` sample betas for `C` predictors """ + # mask data and flip (time x samples) mdata = data[mask].T - # Coerce X to >=2d + # coerce X to >=2d if single variable supplies X = np.atleast_2d(X) - if X.shape[0] == 1: X = X.T - if add_const: # add intercept + if add_const: # add intercept, if specified Xones = np.ones((np.min(mdata.shape), 1)) X = np.column_stack([X, Xones]) - tmpbetas = np.linalg.lstsq(X, mdata)[0].T + betas = np.linalg.lstsq(X, mdata)[0].T if add_const: # drop beta for intercept - tmpbetas = tmpbetas[:, :-1] - out = unmask(tmpbetas, mask) + betas = betas[:, :-1] + betas = unmask(betas, mask) - return out + return betas def getelbow_cons(ks, val=False): @@ -165,13 +173,13 @@ def getelbow_cons(ks, val=False): Parameters ---------- - ks : array-like + ks : array_like val : bool, optional Return the value of the elbow instead of the index. Default: False Returns ------- - array-like + int or float Either the elbow index (if val is True) or the values at the elbow index (if val is False) """ @@ -201,13 +209,13 @@ def getelbow_mod(ks, val=False): Parameters ---------- - ks : array-like + ks : array_like val : bool, optional Return the value of the elbow instead of the index. Default: False Returns ------- - array-like + int or float Either the elbow index (if val is True) or the values at the elbow index (if val is False) """ @@ -234,14 +242,13 @@ def getelbow_aggr(ks, val=False): Parameters ---------- - ks : array-like - + ks : array_like val : bool, optional Default is False Returns ------- - array-like + int or float Either the elbow index (if val is True) or the values at the elbow index (if val is False) """ @@ -287,57 +294,130 @@ def getfbounds(n_echos): def eimask(dd, ees=None): """ Returns mask for data between [0.001, 5] * 98th percentile of dd + + Parameters + ---------- + dd : (S x E x T) array_like + Input data, where `S` is samples, `E` is echos, and `T` is time + ees : (N,) list + Indices of echos to assess from `dd` in calculating output + + Returns + ------- + imask : (S x N) np.ndarray + Boolean array denoting """ if ees is None: ees = range(dd.shape[1]) imask = np.zeros([dd.shape[0], len(ees)], dtype=bool) for ee in ees: - lgr.info(ee) + lgr.info('++ Creating eimask for echo {}'.format(ee)) perc98 = stats.scoreatpercentile(dd[:, ee, :].flatten(), 98, interpolation_method='lower') lthr, hthr = 0.001 * perc98, 5 * perc98 - lgr.info(lthr, hthr) + lgr.info('++ Eimask threshold boundaries: {}'.format([lthr, hthr])) m = dd[:, ee, :].mean(axis=1) imask[np.logical_and(m > lthr, m < hthr), ee] = True return imask -def split_ts(data, comptable, mmix, acc, rej, midk): - cbetas = get_coeffs(data-data.mean(-1)[:, :, :, np.newaxis], mask, mmix) - betas = fmask(cbetas, mask) +def split_ts(data, mmix, mask, acc): + """ + Splits `data` time series into accepted component time series and remainder + + Parameters + ---------- + data : (S x T) array_like + Input data, where `S` is samples and `T` is time + mmix : (T x C) array_like + Mixing matrix for converting input data to component space, where `C` + is components and `T` is the same as in `data` + mask : (S,) array_like + Boolean mask array + acc : list + List of accepted components used to subset `mmix` + + Returns + ------- + hikts : (S x T) np.ndarray + Time series reconstructed using only components in `acc` + rest : (S x T) np.ndarray + Original data with `hikts` removed + """ + + cbetas = get_coeffs(data - data.mean(axis=-1, keepdims=True), mask, mmix) + betas = cbetas[mask] if len(acc) != 0: hikts = unmask(betas[:, acc].dot(mmix.T[acc, :]), mask) else: hikts = None - return hikts, data-hikts + + return hikts, data - hikts def computefeats2(data, mmix, mask, normalize=True): - # Write feature versions of components - data = data[mask] - # demean data - data_vn = stats.zscore(data, axis=-1) - # get betas for demeaned data against `mmix` + """ + Converts `data` to component space using `mmix` + + Parameters + ---------- + data : (S x T) array_like + Input data + mmix : (T x C) array_like + Mixing matrix for converting input data to component space, where `C` + is components and `T` is the same as in `data` + mask : (S,) array-like + Boolean mask array + normalize : bool, optional + Whether to z-score output. Default: True + + Returns + ------- + data_Z : (S x C) np.ndarray + Data in component space + """ + + # demean masked data + data_vn = stats.zscore(data[mask], axis=-1) + + # get betas of `data`~`mmix` and limit to range [-0.999, 0.999] data_R = get_coeffs(unmask(data_vn, mask), mask, mmix)[mask] - # cap betas to range [-0.999, 0.999] data_R[data_R < -0.999] = -0.999 data_R[data_R > 0.999] = 0.999 - # R-to-Z transform? + + # R-to-Z transform data_Z = np.arctanh(data_R) if len(data_Z.shape) == 1: data_Z = np.atleast_2d(data_Z).T + + # normalize data if normalize: - # standardize data_Zm = stats.zscore(data_Z, axis=0) - # add back (mean / stdev) - data_Z = data_Zm + (data_Z.mean(axis=0, keepdims=True) / data_Z.std(axis=0, keepdims=True)) + data_Z = data_Zm + (data_Z.mean(axis=0, keepdims=True) / + data_Z.std(axis=0, keepdims=True)) return data_Z def ctabsel(ctabfile): - ctlines = open(ctabfile).readlines() + """ + Loads a pre-existing component table file + + Parameters + ---------- + ctabfile : str + Filepath to existing component table + + Returns + ------- + ctab : (4,) tuple-of-arrays + Tuple containing arrays of (1) accepted, (2) rejected, (3) mid, and (4) + ignored components + """ + + with open(ctabfile, 'r') as src: + ctlines = src.readlines() class_tags = ['#ACC', '#REJ', '#MID', '#IGN'] class_dict = {} for ii, ll in enumerate(ctlines): @@ -353,21 +433,41 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img, Parameters ---------- catd : (S x E x T) array_like + Input data, where `S` is samples, `E` is echos, and `T` is time mmix : (T x C) array_like + Mixing matrix for converting input data to component space, where `C` + is components and `T` is the same as in `catd` mask : (S,) array_like + Boolean mask array t2s : (S,) array_like t2sG : (S,) array_like - tes : (E,) list - combmode : str + tes : list + List of echo times associated with `catd`, in milliseconds + combmode : {'t2s', 'ste'} str + How optimal combination of echos should be made, where 't2s' indicates + using the method of Posse 1999 and 'ste' indicates using the method of + Poser 2006 ref_img : str or img_like + Reference image to dictate how outputs are saved to disk fout : bool - Whether to output per-component TE-dependencen maps Default: None + Whether to output per-component TE-dependence maps. Default: None reindex : bool, optional Default: False mmixN : array_like, optional Default: None full_sel : bool, optional + Whether to perform selection of components based on Rho/Kappa scores. Default: True + + Returns + ------- + seldict : dict + comptab : (N x 5) np.ndarray + Array with columns denoting (1) index of component, (2) Kappa score of + component, (3) Rho score of component, (4) variance explained by + component, and (5) normalized variance explained bycomponent + betas : np.ndarray + mmix_new : np.ndarray """ # compute optimal combination of raw data @@ -552,11 +652,47 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img, return seldict, comptab, betas, mmix_new -def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversion=99, - filecsdata=False, savecsdiag=True, - strict_mode=False): +def selcomps(seldict, mmix, ref_img, manacc, n_echos, debug=False, olevel=2, oversion=99, + filecsdata=False, savecsdiag=True, strict_mode=False): """ - Select components. + Labels components in `mmix` + + Parameters + ---------- + seldict : dict + As output from `fitmodels_direct` + mmix : (C x T) array_like + Mixing matrix for converting input data to component space, where `C` + is components and `T` is the number of volumes in the original data + ref_img + Reference image to dictate how outputs are saved to disk + manacc + Comma-separated list of indices of manually accepted components + n_echos : int + Number of echos in original data + debug : bool, optional + Default: False + olevel : int, optional + Default: 2 + oversion : int, optional + Default: 99 + filecsdata: bool, optional + Default: False + savecsdiag: bool, optional + Default: True + strict_mode: bool, optional + Default: False + + Returns + ------- + acc : list + Indices of accepted (BOLD) components in `mmix` + rej : list + Indices of rejected (non-BOLD) components in `mmix` + midk : list + Indices of mid-K (questionable) components in `mmix` + ign : list + Indices of ignored components in `mmix` """ if filecsdata: import bz2 @@ -1002,6 +1138,47 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi def tedpca(catd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, ste=0, mlepca=True): + """ + Performs PCA on `catd` and uses TE-dependence to dimensionally reduce data + + Parameters + ---------- + catd : (S x E x T) array_like + Input functional data + combmode : {'t2s', 'ste'} str + How optimal combination of echos should be made, where 't2s' indicates + using the method of Posse 1999 and 'ste' indicates using the method of + Poser 2006 + mask : (S,) array_like + Boolean mask array + stabilize : bool + Whether to attempt to stabilize convergence of ICA by returning + dimensionally-reduced data from PCA and component selection. + ref_img : str or img_like + Reference image to dictate how outputs are saved to disk + tes : list + List of echo times associated with `catd`, in milliseconds + kdaw : float + Dimensionality augmentation weight for Kappa calculations + rdaw : float + Dimensionality augmentation weight for Rho calculations + ste : int or list-of-int, optional + Which echos to use in PCA. Values -1 and 0 are special, where a value + of -1 will indicate using all the echos and 0 will indicate using the + optimal combination of the echos. A list can be provided to indicate + a subset of echos. Default: 0 + mlepca : bool, optional + Whether to use the method originally explained in Minka, NIPS 2000 for + guessing PCA dimensionality instead of a traditional SVD. Default: True + + Returns + ------- + n_components : int + Number of components retained from PCA decomposition + dd : (S x E x T) np.ndarray + Dimensionally-reduced functional data + """ + n_samp, n_echos, n_vols = catd.shape ste = np.array([int(ee) for ee in str(ste).split(',')]) @@ -1022,7 +1199,7 @@ def tedpca(catd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, ste=0, dz = (dz - dz.mean()) / dz.std() # var normalize everything if not op.exists('pcastate.pkl'): - # Do PC dimension selection and get eigenvalue cutoff + # do PC dimension selection and get eigenvalue cutoff if mlepca: from sklearn.decomposition import PCA ppca = PCA(n_components='mle', svd_solver='full') @@ -1067,7 +1244,7 @@ def tedpca(catd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, ste=0, with open('pcastate.pkl', 'wb') as handle: pickle.dump(pcastate, handle) except TypeError: - lgr.info('Could not save PCA solution!') + lgr.warning('Could not save PCA solution.') else: # if loading existing state lgr.info('Loading PCA') @@ -1125,9 +1302,33 @@ def tedpca(catd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, ste=0, def tedica(n_components, dd, conv, fixed_seed, cost, final_cost): """ - Input is dimensionally reduced spatially concatenated multi-echo - time series dataset from `tedpca`. Output is comptable, mmix, smaps - from ICA, and betas from fitting catd to mmix. + Performs ICA on `dd` and returns mixing matrix + + Parameters + ---------- + n_components : int + Number of components retained from PCA decomposition + dd : (S x E x T) np.ndarray + Dimensionally-reduced functional data, where `S` is samples, `E` is + echos, and `T` is time + conv : float + Convergence limit for ICA + fixed_seed : int + Seed for ensuring reproducibility of ICA results + initcost : {'tanh', 'pow3', 'gaus', 'skew'} str, optional + Initial cost function for ICA + finalcost : {'tanh', 'pow3', 'gaus', 'skew'} str, optional + Final cost function for ICA + + Returns + ------- + mmix : (C x T) np.ndarray + Mixing matrix for converting input data to component space, where `C` + is components and `T` is the same as in `dd` + + Notes + ----- + Uses `mdp` implementation of FastICA for decomposition """ import mdp @@ -1139,17 +1340,41 @@ def tedica(n_components, dd, conv, fixed_seed, cost, final_cost): icanode.train(dd) smaps = icanode.execute(dd) # noqa mmix = icanode.get_recmatrix().T - mmix = (mmix-mmix.mean(0))/mmix.std(0) + mmix = stats.zscore(mmix, axis=0) return mmix def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4): """ + Removes global signal from individual echo `catd` and `optcom` time series + This function uses the spatial global signal estimation approach to - modify catd (global variable) to removal global signal out of individual - echo time series datasets. The spatial global signal is estimated - from the optimally combined data after detrending with a Legendre - polynomial basis of `order = 0` and `degree = dtrank`. + to removal global signal out of individual echo time series datasets. The + spatial global signal is estimated from the optimally combined data after + detrending with a Legendre polynomial basis of `order = 0` and + `degree = dtrank`. + + Parameters + ---------- + catd : (S x E x T) array_like + Input functional data + optcom : (S x T) array_like + Optimally-combined functional data (i.e., the output of + `tedana.interfaces.t2smap.make_optcom`) + n_echos : int + Number of echos in data. Should be the same as `E` dimension of `catd` + ref_img : str or img_like + Reference image to dictate how outputs are saved to disk + dtrank : int, optional + Specfies degree of Legendre polynomial basis function for estimating + spatial global signal. Default: 4 + + Returns + ------- + dm_catd : (S x E x T) array_like + Input `catd` with global signal removed from time series + dm_optcom : (S x T) array_like + Input `optcom` with global signal removed from time series """ lgr.info('++ Applying amplitude-based T1 equilibration correction') @@ -1184,8 +1409,8 @@ def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4): np.atleast_2d(glbase.T[dtrank])) + Gmu[Gmask][:, np.newaxis] filewrite(optcom, 'tsoc_orig', ref_img) - optcom = unmask(tsoc_nogs, Gmask) - filewrite(optcom, 'tsoc_nogs.nii', ref_img) + dm_optcom = unmask(tsoc_nogs, Gmask) + filewrite(dm_optcom, 'tsoc_nogs.nii', ref_img) # Project glbase out of each echo dm_catd = catd.copy() # don't overwrite catd @@ -1196,10 +1421,27 @@ def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4): np.atleast_2d(glbase.T[dtrank])) dm_catd[:, echo, :] = unmask(e_nogs, Gmask) - return dm_catd, optcom + return dm_catd, dm_optcom -def gscontrol_mmix(mmix, acc, rej, midk, empty, ref_img): +def gscontrol_mmix(OCcatd, mmix, acc, rej, midk, ref_img): + """ + Parameters + ---------- + OCcatd : (S x T) array_like + Optimally-combined time series data + mmix : (C x T) array_like + Mixing matrix for converting input data to component space, where `C` + is components and `T` is the same as in `OCcatd` + acc : list + Indices of accepted (BOLD) components in `mmix` + rej : list + Indices of rejected (non-BOLD) components in `mmix` + midk : list + Indices of mid-K (questionable) components in `mmix` + ref_img : str or img_like + Reference image to dictate how outputs are saved to disk + """ Gmu = OCcatd.mean(axis=-1) Gstd = OCcatd.std(axis=-1) @@ -1257,38 +1499,118 @@ def gscontrol_mmix(mmix, acc, rej, midk, empty, ref_img): np.savetxt('meica_mix_T1c.1D', mmixnogs) -def write_split_ts(data, comptable, mmix, acc, rej, midk, ref_img, suffix=''): - mdata = fmask(data, mask) - betas = fmask(get_coeffs(unmask((mdata.T - mdata.T.mean(0)).T, mask), - mask, mmix), mask) - dmdata = mdata.T-mdata.T.mean(0) - varexpl = (1-((dmdata.T-betas.dot(mmix.T))**2.).sum()/(dmdata**2.).sum())*100 +def write_split_ts(data, mmix, acc, rej, midk, ref_img, suffix=''): + """ + Splits `data` into denoised / noise / ignored time series and saves to disk + + Parameters + ---------- + data : (S x T) array_like + Input time series + mmix : (C x T) array_like + Mixing matrix for converting input data to component space, where `C` + is components and `T` is the same as in `data` + acc : list + Indices of accepted (BOLD) components in `mmix` + rej : list + Indices of rejected (non-BOLD) components in `mmix` + midk : list + Indices of mid-K (questionable) components in `mmix` + ref_img : str or img_like + Reference image to dictate how outputs are saved to disk + suffix : str, optional + Appended to name of saved files (before extension). Default: '' + + Returns + ------- + varexpl : float + Percent variance of data explained by extracted + retained components + """ + + # mask and de-mean data + mdata = data[mask] + dmdata = mdata.T - mdata.T.mean(axis=0) + + # get variance explained by retained components + betas = get_coeffs(unmask(dmdata.T, mask), mask, mmix)[mask] + varexpl = (1 - ((dmdata.T - betas.dot(mmix.T))**2.).sum() / (dmdata**2.).sum()) * 100 lgr.info('Variance explained: ', varexpl, '%') + + # create component and de-noised time series and save to files + hikts = betas[:, acc].dot(mmix.T[acc, :]) midkts = betas[:, midk].dot(mmix.T[midk, :]) lowkts = betas[:, rej].dot(mmix.T[rej, :]) + dnts = data[mask] - lowkts - midkts if len(acc) != 0: - filewrite(unmask(betas[:, acc].dot(mmix.T[acc, :]), mask), - 'hik_ts_{0}'.format(suffix), ref_img) + filewrite(unmask(hikts, mask), 'hik_ts_{0}'.format(suffix), ref_img) if len(midk) != 0: filewrite(unmask(midkts, mask), 'midk_ts_{0}'.format(suffix), ref_img) if len(rej) != 0: filewrite(unmask(lowkts, mask), 'lowk_ts_{0}'.format(suffix), ref_img) - filewrite(unmask(data[mask] - lowkts - midkts, mask), - 'dn_ts_{0}'.format(suffix), ref_img) + filewrite(unmask(dnts, mask), 'dn_ts_{0}'.format(suffix), ref_img) + return varexpl def writefeats(data, mmix, mask, ref_img, suffix=''): - # Write feature versions of components - feats = computefeats2(data, mmix, mask) - filewrite(unmask(feats, mask), 'feats_{0}'.format(suffix), ref_img) + """ + Converts `data` to component space with `mmix` and saves to disk + Parameters + ---------- + data : (S x T) array_like + Input time series + mmix : (C x T) array_like + Mixing matrix for converting input data to component space, where `C` + is components and `T` is the same as in `data` + mask : (S,) array_like + Boolean mask array + ref_img : str or img_like + Reference image to dictate how outputs are saved to disk + suffix : str, optional + Appended to name of saved files (before extension). Default: '' + + Returns + ------- + fname : str + Filepath to saved file + """ + + # write feature versions of components + feats = unmask(computefeats2(data, mmix, mask), mask) + fname = filewrite(feats, 'feats_{0}'.format(suffix), ref_img) + + return fname + + +def writect(comptable, n_vols, acc, rej, midk, empty, ctname='comp_table.txt', varexpl='-1'): + """ + Saves component table to disk + + Parameters + ---------- + comptable : (N x 5) array_like + Array with columns denoting (1) index of component, (2) Kappa score of + component, (3) Rho score of component, (4) variance explained by + component, and (5) normalized variance explained by component + n_vols : int + Number of volumes in original time series + acc : list + Indices of accepted (BOLD) components in `mmix` + rej : list + Indices of rejected (non-BOLD) components in `mmix` + midk : list + Indices of mid-K (questionable) components in `mmix` + empty : list + Indices of ignored components in `mmix` + ctname : str, optional + Filename to save comptable to disk. Default 'comp_table.txt' + varexpl : str + Variance explained by original data + """ -def writect(comptable, nt, acc, rej, midk, empty, ctname='', varexpl='-1'): n_components = comptable.shape[0] sortab = comptable[comptable[:, 1].argsort()[::-1], :] - if ctname is '': - ctname = 'comp_table.txt' open('accepted.txt', 'w').write(','.join([str(int(cc)) for cc in acc])) open('rejected.txt', 'w').write(','.join([str(int(cc)) for cc in rej])) open('midk_rejected.txt', @@ -1299,7 +1621,7 @@ def writect(comptable, nt, acc, rej, midk, empty, ctname='', varexpl='-1'): n_components=n_components, dfe=len(acc), rjn=len(midk) + len(rej), - dfn=nt - len(midk) - len(rej), + dfn=n_vols - len(midk) - len(rej), acc=','.join([str(int(cc)) for cc in acc]), rej=','.join([str(int(cc)) for cc in rej]), mid=','.join([str(int(cc)) for cc in midk]), @@ -1330,32 +1652,78 @@ def writect(comptable, nt, acc, rej, midk, empty, ctname='', varexpl='-1'): sortab[i, 4])) -def writeresults(OCcatd, comptable, mmix, nt, acc, rej, midk, empty, ref_img): +def writeresults(ts, mask, comptable, mmix, n_vols, acc, rej, midk, empty, ref_img): + """ + Denoises `ts` and saves all resulting files to disk + + Parameters + ---------- + ts : (S x T) array_like + Time series to denoise and save to disk + mask : (S,) array_like + Boolean mask array + comptable : (N x 5) array_like + Array with columns denoting (1) index of component, (2) Kappa score of + component, (3) Rho score of component, (4) variance explained by + component, and (5) normalized variance explained by component + mmix : (C x T) array_like + Mixing matrix for converting input data to component space, where `C` + is components and `T` is the same as in `data` + acc : list + Indices of accepted (BOLD) components in `mmix` + rej : list + Indices of rejected (non-BOLD) components in `mmix` + midk : list + Indices of mid-K (questionable) components in `mmix` + empty : list + Indices of ignored components in `mmix` + ref_img : str or img_like + Reference image to dictate how outputs are saved to disk + """ + lgr.info('++ Writing optimally combined time series') - ts = OCcatd filewrite(ts, 'ts_OC', ref_img) - print("++ Writing Kappa-filtered optimally combined timeseries") - varexpl = write_split_ts(ts, comptable, mmix, acc, rej, midk, ref_img, - suffix='OC') - print("++ Writing signal versions of components") + lgr.info("++ Writing Kappa-filtered optimally combined timeseries") + varexpl = write_split_ts(ts, mmix, acc, rej, midk, ref_img, suffix='OC') + lgr.info("++ Writing signal versions of components") ts_B = get_coeffs(ts, mask, mmix) - filewrite(ts_B[:, :, :, :], 'betas_OC', ref_img) + filewrite(ts_B, 'betas_OC', ref_img) if len(acc) != 0: - filewrite(ts_B[:, :, :, acc], 'betas_hik_OC', ref_img) - print("++ Writing optimally combined high-Kappa features") - writefeats(split_ts(ts, comptable, mmix, acc, rej, midk)[0], + filewrite(ts_B[:, acc], 'betas_hik_OC', ref_img) + lgr.info("++ Writing optimally combined high-Kappa features") + writefeats(split_ts(ts, mmix, mask, acc)[0], mmix[:, acc], mask, ref_img, suffix='OC2') - print("++ Writing component table") - writect(comptable, nt, acc, rej, midk, empty, ctname='comp_table.txt', + lgr.info("++ Writing component table") + writect(comptable, n_vols, acc, rej, midk, empty, ctname='comp_table.txt', varexpl=varexpl) -def writeresults_echoes(acc, rej, midk, ref_img, comptable, mmix, n_echos): - for i_echo in range(n_echos): - print("++ Writing Kappa-filtered TE#%i timeseries" % (i_echo+1)) - write_split_ts(catd[:, :, :, i_echo, :], comptable, mmix, - acc, rej, midk, ref_img, suffix='e%i' % (i_echo+1)) +def writeresults_echoes(catd, mmix, acc, rej, midk, ref_img): + """ + Saves individually denoised echos to disk + + Parameters + ---------- + catd : (S x E x T) array_like + Input data time series + mmix : (C x T) array_like + Mixing matrix for converting input data to component space, where `C` + is components and `T` is the same as in `data` + acc : list + Indices of accepted (BOLD) components in `mmix` + rej : list + Indices of rejected (non-BOLD) components in `mmix` + midk : list + Indices of mid-K (questionable) components in `mmix` + ref_img : str or img_like + Reference image to dictate how outputs are saved to disk + """ + + for i_echo in range(catd.shape[1]): + lgr.info("++ Writing Kappa-filtered TE#%i timeseries" % (i_echo+1)) + write_split_ts(catd[:, i_echo, :], mmix, acc, rej, midk, ref_img, + suffix='e%i' % (i_echo+1)) def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False, @@ -1522,7 +1890,7 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False, lgr.info('** WARNING! No BOLD components detected!!! \n' '** Please check data and results!') - writeresults(OCcatd, comptable, mmix, n_vols, acc, rej, midk, empty, ref_img) - gscontrol_mmix(mmix, acc, rej, midk, empty, ref_img) + writeresults(OCcatd, mask, comptable, mmix, n_vols, acc, rej, midk, empty, ref_img) + gscontrol_mmix(OCcatd, mmix, acc, rej, midk, ref_img) if dne: - writeresults_echoes(acc, rej, midk, ref_img, comptable, mmix, n_echos) + writeresults_echoes(catd, mmix, acc, rej, midk, ref_img) From 0e9e938272b87db5bb2c8e753ad9b4d0e1dd66fa Mon Sep 17 00:00:00 2001 From: Ross Markello Date: Tue, 8 May 2018 22:17:54 -0400 Subject: [PATCH 11/18] [RF] Addressed reviews and minor updates --- Dockerfile | 9 ++-- tedana/interfaces/t2smap.py | 10 ++-- tedana/interfaces/tedana.py | 86 +++++++++++++++++---------------- tedana/tests/test_tedana.py | 20 ++++---- tedana/tests/test_utils.py | 10 +--- tedana/utils/__init__.py | 8 ++-- tedana/utils/utils.py | 95 ++++++------------------------------- 7 files changed, 84 insertions(+), 154 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7409b80a0..57f3e15c3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -98,7 +98,7 @@ RUN conda create -y -q --name default --channel vida-nyu python=3.6.1 \ && sync && conda clean -tipsy && sync \ && /bin/bash -c "source activate default \ && pip install -q --no-cache-dir \ - nipype ipython scikit-learn scipy ipdb mdp" \ + nipype ipython scikit-learn scipy ipdb mdp nilearn nibabel>=2.1.0" \ && sync ENV PATH=/opt/conda/envs/default/bin:$PATH @@ -110,15 +110,12 @@ RUN conda create -y -q --name py27 python=2.7 \ && sync && conda clean -tipsy && sync \ && /bin/bash -c "source activate default \ && pip install -q --no-cache-dir \ - nipype ipython scikit-learn scipy ipdb mdp" \ + nipype ipython scikit-learn scipy ipdb mdp nilearn nibabel>=2.1.0" \ && sync USER root # User-defined instruction -RUN mkdir /home/neuro/code - -# User-defined instruction -RUN mkdir /home/neuro/data +RUN mkdir /home/neuro/code /home/neuro/data WORKDIR /home/neuro diff --git a/tedana/interfaces/t2smap.py b/tedana/interfaces/t2smap.py index 61020e168..8e2d5d3af 100644 --- a/tedana/interfaces/t2smap.py +++ b/tedana/interfaces/t2smap.py @@ -1,5 +1,5 @@ import numpy as np -from tedana.utils import (filewrite, load_data, makeadmask, unmask, fmask) +from tedana.utils import (filewrite, load_data, make_adaptive_mask, unmask) import logging logging.basicConfig(format='[%(levelname)s]: %(message)s', level=logging.INFO) @@ -12,7 +12,7 @@ def fit(data, mask, tes, masksum, start_echo): T2* and S0 timeseries. """ nx, ny, nz, n_echos, n_trs = data.shape - echodata = fmask(data, mask) + echodata = data[mask] tes = np.array(tes) t2sa_ts = np.zeros([nx, ny, nz, n_trs]) @@ -173,10 +173,10 @@ def make_optcom(data, t2s, tes, mask, combmode): tes = np.array(tes)[np.newaxis] # (1 x E) array_like if t2s.ndim == 1: - lgr.info('Optimally combining with voxel-wise T2 estimates') + lgr.info('++ Optimally combining data with voxel-wise T2 estimates') ft2s = t2s[mask, np.newaxis] else: - lgr.info('Optimally combining with voxel- and volume-wise T2 estimates') + lgr.info('++ Optimally combining data with voxel- and volume-wise T2 estimates') ft2s = t2s[mask, :, np.newaxis] if combmode == 'ste': @@ -223,7 +223,7 @@ def main(options): ref_img = data[0] if isinstance(data, list) else data lgr.info("++ Computing Mask") - mask, masksum = makeadmask(catd, minimum=False, getsum=True) + mask, masksum = make_adaptive_mask(catd, minimum=False, getsum=True) filewrite(masksum, 'masksum%s' % suf, ref_img, copy_header=False) lgr.info("++ Computing Adaptive T2* map") diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py index 37f2ac3e8..98b8c292f 100644 --- a/tedana/interfaces/tedana.py +++ b/tedana/interfaces/tedana.py @@ -11,8 +11,8 @@ from sklearn.cluster import DBSCAN from tedana.interfaces import (make_optcom, t2sadmap) from tedana.utils import (load_image, load_data, get_dtype, - make_min_mask, makeadmask, - fmask, unmask, filewrite, + make_min_mask, make_adaptive_mask, + unmask, filewrite, fitgaussian, dice, andb) import logging @@ -151,16 +151,16 @@ def get_coeffs(data, mask, X, add_const=False): # mask data and flip (time x samples) mdata = data[mask].T - # coerce X to >=2d if single variable supplies + # coerce X to >=2d X = np.atleast_2d(X) - if X.shape[0] == 1: + + if len(X) == 1: X = X.T if add_const: # add intercept, if specified - Xones = np.ones((np.min(mdata.shape), 1)) - X = np.column_stack([X, Xones]) + X = np.column_stack([X, np.ones((len(X), 1))]) betas = np.linalg.lstsq(X, mdata)[0].T - if add_const: # drop beta for intercept + if add_const: # drop beta for intercept, if specified betas = betas[:, :-1] betas = unmask(betas, mask) @@ -316,7 +316,8 @@ def eimask(dd, ees=None): perc98 = stats.scoreatpercentile(dd[:, ee, :].flatten(), 98, interpolation_method='lower') lthr, hthr = 0.001 * perc98, 5 * perc98 - lgr.info('++ Eimask threshold boundaries: {}'.format([lthr, hthr])) + lgr.info('++ Eimask threshold boundaries: ' + '{:.03f} {:.03f}'.format(lthr, hthr)) m = dd[:, ee, :].mean(axis=1) imask[np.logical_and(m > lthr, m < hthr), ee] = True @@ -389,7 +390,7 @@ def computefeats2(data, mmix, mask, normalize=True): # R-to-Z transform data_Z = np.arctanh(data_R) - if len(data_Z.shape) == 1: + if data_Z.ndim == 1: data_Z = np.atleast_2d(data_Z).T # normalize data @@ -471,8 +472,7 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img, """ # compute optimal combination of raw data - tsoc = np.array(make_optcom(catd, t2sG, tes, mask, combmode), - dtype=float)[mask] + tsoc = make_optcom(catd, t2sG, tes, mask, combmode).astype(float)[mask] # demean optimal combination tsoc_dm = tsoc - tsoc.mean(axis=-1, keepdims=True) @@ -593,6 +593,7 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img, # full selection including clustering criteria seldict = None if full_sel: + lgr.info('++ Performing spatial clustering of components') for i in range(n_components): # save out files out = np.zeros((n_samp, 4)) @@ -606,7 +607,7 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img, out[:, 2] = np.squeeze(unmask(F_S0_maps[:, i], t2s != 0)) out[:, 3] = np.squeeze(unmask(Z_maps[:, i], mask)) - filewrite(out, ccname, ref_img, gzip=gzip) + ccname = filewrite(out, ccname, ref_img, gzip=gzip) if get_dtype(ref_img) == 'GIFTI': continue # TODO: pass through GIFTI file data as below @@ -694,10 +695,11 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, debug=False, olevel=2, ove ign : list Indices of ignored components in `mmix` """ + if filecsdata: import bz2 if seldict is not None: - lgr.info('Saving component selection data') + lgr.info('++ Saving component selection data') csstate_f = bz2.BZ2File('compseldata.pklbz', 'wb') pickle.dump(seldict, csstate_f) csstate_f.close() @@ -707,7 +709,7 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, debug=False, olevel=2, ove seldict = pickle.load(csstate_f) csstate_f.close() except FileNotFoundError: - lgr.info('No component data found!') + lgr.warning('++ No component data found!') return None # Dump dictionary into variable names @@ -1043,13 +1045,14 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, debug=False, olevel=2, ove for t2sl_i in range(len(t2s_lim)): t2sl = t2s_lim[t2sl_i] veinW = sig_B[:, veinc]*np.tile(rej_veinRZ, [sig_B.shape[0], 1]) - veincand = fmask(unmask(andb([s0[t2s != 0] < np.median(s0[t2s != 0]), - t2s[t2s != 0] < t2sl]) >= 1, t2s != 0), mask) + veincand = unmask(andb([s0[t2s != 0] < np.median(s0[t2s != 0]), + t2s[t2s != 0] < t2sl]) >= 1, + t2s != 0)[mask] veinW[~veincand] = 0 - invein = veinW.sum(1)[fmask(unmask(veinmaskf, mask) * unmask(veinW.sum(1) > 1, mask), - mask)] + invein = veinW.sum(axis=1)[(unmask(veinmaskf, mask) * + unmask(veinW.sum(axis=1) > 1, mask))[mask]] minW = 10 * (np.log10(invein).mean()) - 1 * 10**(np.log10(invein).std()) - veinmaskB = veinW.sum(1) > minW + veinmaskB = veinW.sum(axis=1) > minW tsoc_Bp = tsoc_B.copy() tsoc_Bp[tsoc_Bp < 0] = 0 vvex = np.array([(tsoc_Bp[veinmaskB, ii]**2.).sum() / @@ -1136,8 +1139,8 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, debug=False, olevel=2, ove return list(sorted(ncl)), list(sorted(rej)), list(sorted(midk)), list(sorted(ign)) -def tedpca(catd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, ste=0, - mlepca=True): +def tedpca(catd, OCcatd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, + ste=0, mlepca=True): """ Performs PCA on `catd` and uses TE-dependence to dimensionally reduce data @@ -1145,6 +1148,8 @@ def tedpca(catd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, ste=0, ---------- catd : (S x E x T) array_like Input functional data + OCcatd : (S x T) array_like + Optimally-combined time series data combmode : {'t2s', 'ste'} str How optimal combination of echos should be made, where 't2s' indicates using the method of Posse 1999 and 'ste' indicates using the method of @@ -1183,13 +1188,13 @@ def tedpca(catd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, ste=0, ste = np.array([int(ee) for ee in str(ste).split(',')]) if len(ste) == 1 and ste[0] == -1: - lgr.info('-Computing PCA of optimally combined multi-echo data') + lgr.info('++ Computing PCA of optimally combined multi-echo data') d = OCcatd[make_min_mask(OCcatd[:, np.newaxis, :])][:, np.newaxis, :] elif len(ste) == 1 and ste[0] == 0: - lgr.info('-Computing PCA of spatially concatenated multi-echo data') + lgr.info('++ Computing PCA of spatially concatenated multi-echo data') d = catd[mask].astype('float64') else: - lgr.info('-Computing PCA of TE #%s' % ','.join([str(ee) for ee in ste])) + lgr.info('++ Computing PCA of TE #%s' % ','.join([str(ee) for ee in ste])) d = np.stack([catd[mask, ee] for ee in ste - 1], axis=1).astype('float64') eim = np.squeeze(eimask(d)) @@ -1237,7 +1242,7 @@ def tedpca(catd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, ste=0, ctb = np.vstack([ctb.T[:3], sp]).T # Save state - lgr.info('Saving PCA') + lgr.info('++ Saving PCA') pcastate = {'u': u, 's': s, 'v': v, 'ctb': ctb, 'eigelb': eigelb, 'spmin': spmin, 'spcum': spcum} try: @@ -1247,7 +1252,7 @@ def tedpca(catd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, ste=0, lgr.warning('Could not save PCA solution.') else: # if loading existing state - lgr.info('Loading PCA') + lgr.info('++ Loading PCA') with open('pcastate.pkl', 'rb') as handle: pcastate = pickle.load(handle) u, s, v = pcastate['u'], pcastate['s'], pcastate['v'] @@ -1291,8 +1296,8 @@ def tedpca(catd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, ste=0, dd = u.dot(np.diag(s*np.array(pcsel, dtype=np.int))).dot(v) n_components = s[pcsel].shape[0] - lgr.info('--Selected {0} components. Minimum Kappa={1:.02f} ' - 'Rho={2:.02f}'.format(n_components, kappa_thr, rho_thr)) + lgr.info('++ Selected {0} components. Kappa threshold: {1:.02f}, ' + 'Rho threshold: {2:.02f}'.format(n_components, kappa_thr, rho_thr)) dd = stats.zscore(dd.T, axis=0).T # variance normalize timeseries dd = stats.zscore(dd, axis=None) # variance normalize everything @@ -1359,8 +1364,7 @@ def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4): catd : (S x E x T) array_like Input functional data optcom : (S x T) array_like - Optimally-combined functional data (i.e., the output of - `tedana.interfaces.t2smap.make_optcom`) + Optimally-combined functional data (i.e., the output of `make_optcom`) n_echos : int Number of echos in data. Should be the same as `E` dimension of `catd` ref_img : str or img_like @@ -1410,7 +1414,7 @@ def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4): filewrite(optcom, 'tsoc_orig', ref_img) dm_optcom = unmask(tsoc_nogs, Gmask) - filewrite(dm_optcom, 'tsoc_nogs.nii', ref_img) + filewrite(dm_optcom, 'tsoc_nogs', ref_img) # Project glbase out of each echo dm_catd = catd.copy() # don't overwrite catd @@ -1683,18 +1687,18 @@ def writeresults(ts, mask, comptable, mmix, n_vols, acc, rej, midk, empty, ref_i lgr.info('++ Writing optimally combined time series') filewrite(ts, 'ts_OC', ref_img) - lgr.info("++ Writing Kappa-filtered optimally combined timeseries") + lgr.info('++ Writing Kappa-filtered optimally combined timeseries') varexpl = write_split_ts(ts, mmix, acc, rej, midk, ref_img, suffix='OC') - lgr.info("++ Writing signal versions of components") + lgr.info('++ Writing signal versions of components') ts_B = get_coeffs(ts, mask, mmix) filewrite(ts_B, 'betas_OC', ref_img) if len(acc) != 0: filewrite(ts_B[:, acc], 'betas_hik_OC', ref_img) - lgr.info("++ Writing optimally combined high-Kappa features") + lgr.info('++ Writing optimally combined high-Kappa features') writefeats(split_ts(ts, mmix, mask, acc)[0], mmix[:, acc], mask, ref_img, suffix='OC2') - lgr.info("++ Writing component table") + lgr.info('++ Writing component table') writect(comptable, n_vols, acc, rej, midk, empty, ctname='comp_table.txt', varexpl=varexpl) @@ -1791,6 +1795,7 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False, n_echos = len(tes) # coerce data to samples x echos x time array + lgr.info('++ Loading input data: {}'.format(data)) catd, ref_img = load_data(data, n_echos=n_echos) n_samp, n_echos, n_vols = catd.shape @@ -1825,12 +1830,11 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False, lgr.info('++ Computing Mask') global mask - mask, masksum = makeadmask(catd, minimum=False, getsum=True) + mask, masksum = make_adaptive_mask(catd, minimum=False, getsum=True) lgr.info('++ Computing T2* map') global t2s, s0, t2sG - t2s, s0, t2ss, s0s, t2sG, s0G = t2sadmap(catd, tes, - mask, masksum, + t2s, s0, t2ss, s0s, t2sG, s0G = t2sadmap(catd, tes, mask, masksum, start_echo=1) # set a hard cap for the T2* map @@ -1838,12 +1842,12 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False, cap_t2s = stats.scoreatpercentile(t2s.flatten(), 99.5, interpolation_method='lower') t2s[t2s > cap_t2s * 10] = cap_t2s - filewrite(s0, op.join(out_dir, 's0v'), ref_img) filewrite(t2s, op.join(out_dir, 't2sv'), ref_img) + filewrite(s0, op.join(out_dir, 's0v'), ref_img) filewrite(t2ss, op.join(out_dir, 't2ss'), ref_img) filewrite(s0s, op.join(out_dir, 's0vs'), ref_img) - filewrite(s0G, op.join(out_dir, 's0vG'), ref_img) filewrite(t2sG, op.join(out_dir, 't2svG'), ref_img) + filewrite(s0G, op.join(out_dir, 's0vG'), ref_img) # optimally combine data global OCcatd @@ -1855,7 +1859,7 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False, if mixm is None: lgr.info("++ Doing ME-PCA and ME-ICA") - n_components, dd = tedpca(catd, combmode, mask, stabilize, ref_img, + n_components, dd = tedpca(catd, OCcatd, combmode, mask, stabilize, ref_img, tes=tes, kdaw=kdaw, rdaw=rdaw, ste=ste) mmix_orig = tedica(n_components, dd, conv, fixed_seed, cost=initcost, final_cost=finalcost) diff --git a/tedana/tests/test_tedana.py b/tedana/tests/test_tedana.py index 9a18d25ce..3abce279e 100644 --- a/tedana/tests/test_tedana.py +++ b/tedana/tests/test_tedana.py @@ -36,7 +36,18 @@ def test_outputs(): """ Compare the niftis specified in the below list again """ + nifti_test_list = [ + 't2sv.nii', + 's0v.nii', + 't2ss.nii', + 's0vs.nii', + 't2svG.nii', + 's0vG.nii', + 'T1gs.nii', + 'tsoc_orig.nii', + 'tsoc_nogs.nii', + # files are in order of creation above this point '.cc_temp.nii.gz', '.fcl_in.nii.gz', '.fcl_out.nii.gz', @@ -52,17 +63,8 @@ def test_outputs(): 'hik_ts_OC_T1c.nii', 'lowk_ts_OC.nii', 'midk_ts_OC.nii', - 's0v.nii', - 's0vG.nii', - 's0vs.nii', 'sphis_hik.nii', - 'T1gs.nii', - 't2ss.nii', - 't2sv.nii', - 't2svG.nii', 'ts_OC.nii', - 'tsoc_nogs.nii', - 'tsoc_orig.nii', 'veins_l0.nii', 'veins_l1.nii'] test_dir = Path('/home/neuro/data/TED/') diff --git a/tedana/tests/test_utils.py b/tedana/tests/test_utils.py index 8a027a858..102f36a89 100644 --- a/tedana/tests/test_utils.py +++ b/tedana/tests/test_utils.py @@ -11,7 +11,7 @@ def test_cat2echos(): pass -def test_makeadmask(): +def test_make_adaptive_mask(): pass @@ -19,14 +19,6 @@ def test_make_min_mask(): pass -def test_uncat2echos(): - pass - - -def test_fmask(): - pass - - def test_unmask(): pass diff --git a/tedana/utils/__init__.py b/tedana/utils/__init__.py index 771178fd5..00b45b9bc 100644 --- a/tedana/utils/__init__.py +++ b/tedana/utils/__init__.py @@ -3,14 +3,14 @@ from .utils import ( load_image, load_data, get_dtype, - make_min_mask, makeadmask, - fmask, unmask, filewrite, + make_min_mask, make_adaptive_mask, + unmask, filewrite, fitgaussian, dice, andb, ) __all__ = [ 'load_image', 'load_data', 'get_dtype' - 'make_min_mask', 'makeadmask', - 'fmask', 'unmask', 'filewrite', + 'make_min_mask', 'make_adaptive_mask', + 'unmask', 'filewrite', 'fitgaussian', 'dice', 'andb'] diff --git a/tedana/utils/utils.py b/tedana/utils/utils.py index a090b95db..7d7b01796 100644 --- a/tedana/utils/utils.py +++ b/tedana/utils/utils.py @@ -7,6 +7,7 @@ from nilearn._utils import check_niimg import nilearn.masking as nimask from scipy.optimize import leastsq +from sklearn.utils import check_array from ..due import due, BibTeX @@ -125,7 +126,7 @@ def load_data(data, n_echos=None): return fdata, ref_img -def makeadmask(data, minimum=True, getsum=False): +def make_adaptive_mask(data, minimum=True, getsum=False): """ Makes map of `data` specifying longest echo a voxel can be sampled with @@ -362,89 +363,25 @@ def make_gii_darray(ref_array, data, copy_meta=False): return darray -def uncat2echos(data): - """ - Combines Z- and echo-axis in `data` - - Parameters - ---------- - data : (X x Y x Z x E x T) array_like - Multi-echo data array - - Returns - ------- - fdata : (X x Y x M x T) np.ndarray - Z-concatenated multi-echo data array, where M is Z * number of echos - """ - - if data.ndim < 4: - raise ValueError('Input data must have at least four dimensions; ' - 'provided data has only {0}'.format(data.ndim)) - - (nx, ny), nz = data.shape[:2], np.prod(data.shape[2:4]) - return data.reshape(nx, ny, nz, -1) - - -def fmask(data, mask=None): - """ - Masks `data` with non-zero entries of `mask` - - Parameters - ---------- - data : (X x Y x Z [x E [x T]) array_like or img_like object - Data array or data file to be masked - mask : (X x Y x Z) array_like or img_like object - Boolean array or mask file - - Returns - ------- - fdata : (S x E x T) np.ndarray - Masked `data`, where `S` is samples, `E` is echoes, and `T` is time - """ - - if mask is not None and not type(data) == type(mask): - raise TypeError('Provided `data` and `mask` must be of same type.') - - if isinstance(data, str): - root, ext, addext = splitext_addext(data) - if ext == '.gii': - # mask need not apply for gii files - fdata = np.column_stack([f.data for f in nib.load(data).darrays]) - else: - # use nilearn for other files - data = check_niimg(data) - if mask is not None: - # TODO: check that this uses same order to flatten - fdata = nimask.apply_mask(data, mask).T - else: - fdata = data.get_data().reshape((-1,) + data.shape[3:]) - elif isinstance(data, np.ndarray): - # flatten data over first three dimensions and apply mask - fdata = data.reshape((-1,) + data.shape[3:]) - if mask is not None: - fdata = fdata[mask.flatten() > 0] - - return fdata.squeeze() - - def unmask(data, mask): """ Unmasks `data` using non-zero entries of `mask` Parameters ---------- - data : (M x E x T) array_like - Masked array, where `M` is the number of samples + data : (M [x E [x T]]) array_like + Masked array, where `M` is the number of `True` values in `mask` mask : (S,) array_like - Boolean array of `S` samples that was used to mask `data` + Boolean array of `S` samples that was used to mask `data`. It should + have exactly `M` True values. Returns ------- - out : (S x E x T) np.ndarray + out : (S [x E [x T]]) np.ndarray Unmasked `data` array """ - out = np.zeros((mask.shape + data.shape[1:])) + out = np.zeros(mask.shape + data.shape[1:]) out[mask] = data return out @@ -618,15 +555,13 @@ def andb(arrs): Integer array of summed `arrs` """ - same_shape = [] - for arr in arrs: - for arr2 in arrs: - same_shape.append(arr.shape == arr2.shape) - + # coerce to integer and ensure same shape + arrs = [check_array(arr, dtype=int) for arr in arrs] + same_shape = [arr1.shape == arr2.shape for arr1 in arrs for arr2 in arrs] if not np.all(same_shape): - raise ValueError('All input arrays must have same shape') + raise ValueError('All input arrays must have same shape.') + + # sum across arrays + result = np.sum(arrs, axis=0) - result = np.zeros(arrs[0].shape) - for arr in arrs: - result += np.array(arr, dtype=np.int) return result From a784e8652e1d7f40a0faf38fa28217d9f7c3f4ae Mon Sep 17 00:00:00 2001 From: Ross Markello Date: Tue, 8 May 2018 22:54:16 -0400 Subject: [PATCH 12/18] [FIX] Needed order=F for when zcat data provided --- tedana/interfaces/tedana.py | 35 ++++++++++++++++++----------------- tedana/utils/utils.py | 3 +-- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py index 98b8c292f..224401759 100644 --- a/tedana/interfaces/tedana.py +++ b/tedana/interfaces/tedana.py @@ -637,9 +637,11 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img, # Do simple clustering on ranked signal-change map countsigFR2 = F_R2_clmaps[:, i].sum() countsigFS0 = F_S0_clmaps[:, i].sum() - Br_clmaps_R2[:, i] = spatclust(stats.rankdata(tsoc_Babs[:, i]), mask, - csize, max(tsoc_Babs.shape)-countsigFR2, ref_img) - Br_clmaps_S0[:, i] = spatclust(stats.rankdata(tsoc_Babs[:, i]), mask, + spclust_input = stats.rankdata(tsoc_Babs[:, i]) + Br_clmaps_R2[:, i] = spatclust(spclust_input, mask, + csize, max(tsoc_Babs.shape)-countsigFR2, + ref_img) + Br_clmaps_S0[:, i] = spatclust(spclust_input, mask, csize, max(tsoc_Babs.shape)-countsigFS0, ref_img) @@ -709,12 +711,13 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, debug=False, olevel=2, ove seldict = pickle.load(csstate_f) csstate_f.close() except FileNotFoundError: - lgr.warning('++ No component data found!') + lgr.warning('++ Failed to load component selection data') return None # Dump dictionary into variable names + # TODO: this is a terrible way to do things and we should change it for key in seldict.keys(): - exec("%s=seldict['%s']" % (key, key)) + exec("{0}=seldict['{0}']".format(key)) # List of components midk = [] @@ -897,8 +900,7 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, debug=False, olevel=2, ove np.intersect1d(nc[db.labels_ == 0], nc[Rhos > getelbow_mod(Rhos_sorted, val=True)]).shape[0]]) - if debug: - lgr.info('found solution', ii, db.labels_) + lgr.debug('++ Found solution', ii, db.labels_) db = None epsmap = np.array(epsmap) @@ -908,7 +910,7 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, debug=False, olevel=2, ove # Select index that maximizes Dice with guessmask but first # minimizes number of higher Rho components ii = int(epsmap[np.argmax(epsmap[epsmap[:, 2] == np.min(epsmap[:, 2]), 1], 0), 0]) - lgr.info('Component selection tuning: ', epsmap[:, 1].max()) + lgr.info('++ Component selection tuning: ', epsmap[:, 1].max()) db = DBSCAN(eps=.005+ii*.005, min_samples=3).fit(fz.T) ncl = nc[db.labels_ == 0] ncl = np.setdiff1d(ncl, rej) @@ -918,7 +920,7 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, debug=False, olevel=2, ove to_clf = np.setdiff1d(nc, np.union1d(ncl, rej)) if len(group0) == 0 or len(group0) < len(KRguess) * .5: dbscanfailed = True - lgr.info('DBSCAN based guess failed. Using elbow guess method.') + lgr.info('++ DBSCAN based guess failed. Using elbow guess method.') ncl = np.setdiff1d(np.setdiff1d(nc[KRelbow == 2], rej), np.union1d(nc[tt_table[:, 0] < tt_lim], np.union1d(np.union1d(nc[spz > 1], @@ -929,8 +931,8 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, debug=False, olevel=2, ove group_n1 = [] to_clf = np.setdiff1d(nc, np.union1d(group0, rej)) if len(group0) < 2 or (len(group0) < 4 and float(len(rej))/len(group0) > 3): - lgr.info('WARNING: Extremely limited reliable BOLD signal space. ' - 'Not filtering further into midk etc.') + lgr.warning('++ Extremely limited reliable BOLD signal space. ' + 'Not filtering further into midk etc.') midkfailed = True min_acc = np.array([]) if len(group0) != 0: @@ -1194,7 +1196,7 @@ def tedpca(catd, OCcatd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, lgr.info('++ Computing PCA of spatially concatenated multi-echo data') d = catd[mask].astype('float64') else: - lgr.info('++ Computing PCA of TE #%s' % ','.join([str(ee) for ee in ste])) + lgr.info('++ Computing PCA of echo #%s' % ','.join([str(ee) for ee in ste])) d = np.stack([catd[mask, ee] for ee in ste - 1], axis=1).astype('float64') eim = np.squeeze(eimask(d)) @@ -1249,7 +1251,7 @@ def tedpca(catd, OCcatd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, with open('pcastate.pkl', 'wb') as handle: pickle.dump(pcastate, handle) except TypeError: - lgr.warning('Could not save PCA solution.') + lgr.warning('++ Could not save PCA solution.') else: # if loading existing state lgr.info('++ Loading PCA') @@ -1538,7 +1540,7 @@ def write_split_ts(data, mmix, acc, rej, midk, ref_img, suffix=''): # get variance explained by retained components betas = get_coeffs(unmask(dmdata.T, mask), mask, mmix)[mask] varexpl = (1 - ((dmdata.T - betas.dot(mmix.T))**2.).sum() / (dmdata**2.).sum()) * 100 - lgr.info('Variance explained: ', varexpl, '%') + lgr.info('++ Variance explained: ', varexpl, '%') # create component and de-noised time series and save to files hikts = betas[:, acc].dot(mmix.T[acc, :]) @@ -1725,7 +1727,7 @@ def writeresults_echoes(catd, mmix, acc, rej, midk, ref_img): """ for i_echo in range(catd.shape[1]): - lgr.info("++ Writing Kappa-filtered TE#%i timeseries" % (i_echo+1)) + lgr.info('++ Writing Kappa-filtered echo #{:01d} timeseries'.format(i_echo+1)) write_split_ts(catd[:, i_echo, :], mmix, acc, rej, midk, ref_img, suffix='e%i' % (i_echo+1)) @@ -1891,8 +1893,7 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False, acc, rej, midk, empty = ctabsel(ctab) if len(acc) == 0: - lgr.info('** WARNING! No BOLD components detected!!! \n' - '** Please check data and results!') + lgr.warning('++ No BOLD components detected!!! Please check data and results!') writeresults(OCcatd, mask, comptable, mmix, n_vols, acc, rej, midk, empty, ref_img) gscontrol_mmix(OCcatd, mmix, acc, rej, midk, ref_img) diff --git a/tedana/utils/utils.py b/tedana/utils/utils.py index 7d7b01796..c5d38f907 100644 --- a/tedana/utils/utils.py +++ b/tedana/utils/utils.py @@ -5,7 +5,6 @@ from nibabel.filename_parser import splitext_addext from nilearn.image import new_img_like from nilearn._utils import check_niimg -import nilearn.masking as nimask from scipy.optimize import leastsq from sklearn.utils import check_array @@ -117,7 +116,7 @@ def load_data(data, n_echos=None): # we have a z-cat file img = check_niimg(data) (nx, ny), nz = img.shape[:2], img.shape[2] // n_echos - fdata = load_image(img.get_data().reshape(nx, ny, nz, n_echos, -1)) + fdata = load_image(img.get_data().reshape(nx, ny, nz, n_echos, -1, order='F')) # create reference image ref_img = img.__class__(np.zeros((nx, ny, nz)), affine=img.affine, From 4d768129da32ebdcb6de677e6faae72ec2599785 Mon Sep 17 00:00:00 2001 From: Ross Markello Date: Tue, 8 May 2018 23:34:19 -0400 Subject: [PATCH 13/18] [FIX] Allow N-dimensional arrays in `utils.andb` --- tedana/interfaces/tedana.py | 4 +--- tedana/tests/test_tedana.py | 2 +- tedana/utils/utils.py | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py index 224401759..6327b8fd3 100644 --- a/tedana/interfaces/tedana.py +++ b/tedana/interfaces/tedana.py @@ -655,7 +655,7 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img, return seldict, comptab, betas, mmix_new -def selcomps(seldict, mmix, ref_img, manacc, n_echos, debug=False, olevel=2, oversion=99, +def selcomps(seldict, mmix, ref_img, manacc, n_echos, olevel=2, oversion=99, filecsdata=False, savecsdiag=True, strict_mode=False): """ Labels components in `mmix` @@ -673,8 +673,6 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, debug=False, olevel=2, ove Comma-separated list of indices of manually accepted components n_echos : int Number of echos in original data - debug : bool, optional - Default: False olevel : int, optional Default: 2 oversion : int, optional diff --git a/tedana/tests/test_tedana.py b/tedana/tests/test_tedana.py index 3abce279e..d7643cd18 100644 --- a/tedana/tests/test_tedana.py +++ b/tedana/tests/test_tedana.py @@ -47,12 +47,12 @@ def test_outputs(): 'T1gs.nii', 'tsoc_orig.nii', 'tsoc_nogs.nii', - # files are in order of creation above this point '.cc_temp.nii.gz', '.fcl_in.nii.gz', '.fcl_out.nii.gz', '__clin.nii.gz', '__clout.nii.gz', + # files are in order of creation above this point 'betas_hik_OC.nii', 'betas_hik_OC_T1c.nii', 'betas_OC.nii', diff --git a/tedana/utils/utils.py b/tedana/utils/utils.py index c5d38f907..165c8b98f 100644 --- a/tedana/utils/utils.py +++ b/tedana/utils/utils.py @@ -555,7 +555,7 @@ def andb(arrs): """ # coerce to integer and ensure same shape - arrs = [check_array(arr, dtype=int) for arr in arrs] + arrs = [check_array(arr, dtype=int, ensure_2d=False, allow_nd=True) for arr in arrs] same_shape = [arr1.shape == arr2.shape for arr1 in arrs for arr2 in arrs] if not np.all(same_shape): raise ValueError('All input arrays must have same shape.') From 7f70268782081264462ca5693fac831c5ae50e2e Mon Sep 17 00:00:00 2001 From: Ross Markello Date: Wed, 9 May 2018 19:01:40 -0400 Subject: [PATCH 14/18] [FIX] Unmask not retaining dtype Also, other updates to `selcomps()` to hopefully get it passing. --- tedana/interfaces/tedana.py | 25 +++++++++++++++++-------- tedana/tests/test_tedana.py | 21 ++++++++++----------- tedana/utils/__init__.py | 4 ++-- tedana/utils/utils.py | 4 +++- 4 files changed, 32 insertions(+), 22 deletions(-) diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py index 6327b8fd3..64ab54806 100644 --- a/tedana/interfaces/tedana.py +++ b/tedana/interfaces/tedana.py @@ -12,7 +12,7 @@ from tedana.interfaces import (make_optcom, t2sadmap) from tedana.utils import (load_image, load_data, get_dtype, make_min_mask, make_adaptive_mask, - unmask, filewrite, + unmask, filewrite, new_nii_like, fitgaussian, dice, andb) import logging @@ -776,7 +776,7 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, olevel=2, oversion=99, # Time series derivative kurtosis mmix_dt = (mmix[:-1] - mmix[1:]) mmix_kurt = stats.kurtosis(mmix_dt) - mmix_std = np.std(mmix_dt, 0) + mmix_std = np.std(mmix_dt, axis=0) """ Step 1: Reject anything that's obviously an artifact @@ -790,21 +790,30 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, olevel=2, oversion=99, Step 2: Compute 3-D spatial FFT of Beta maps to detect high-spatial frequency artifacts """ - fproj_arr = np.zeros([np.prod(mask.shape[0:2]), len(nc)]) - fproj_arr_val = np.zeros([np.prod(mask.shape[0:2]), len(nc)]) + # spatial information is important so for NIFTI we convert back to 3D space + if get_dtype(ref_img) == 'NIFTI': + dim1 = np.prod(ref_img.shape[:2]) + else: + dim1 = mask.shape[0] + fproj_arr = np.zeros([dim1, len(nc)]) + fproj_arr_val = np.zeros([dim1, len(nc)]) spr = [] fdist = [] for ii in nc: - fproj = np.fft.fftshift(np.abs(np.fft.rfftn(unmask(seldict['PSC'], - mask)[:, :, :, ii]))) - fproj_z = fproj.max(2) + # convert data back to 3D array + if get_dtype(ref_img) == 'NIFTI': + tproj = new_nii_like(unmask(seldict['PSC'], mask)[:, ii], ref_img).get_data() + else: + tproj = unmask(seldict['PSC'], mask)[:, ii] + fproj = np.fft.fftshift(np.abs(np.fft.rfftn(tproj))) + fproj_z = fproj.max(axis=2) fproj[fproj == fproj.max()] = 0 fproj_arr[:, ii] = stats.rankdata(fproj_z.flatten()) fproj_arr_val[:, ii] = fproj_z.flatten() spr.append(np.array(fproj_z > fproj_z.max() / 4, dtype=np.int).sum()) fprojr = np.array([fproj, fproj[:, :, ::-1]]).max(0) fdist.append(np.max([fitgaussian(fproj.max(jj))[3:].max() for - jj in range(len(fprojr.shape))])) + jj in range(fprojr.ndim)])) fdist = np.array(fdist) spr = np.array(spr) diff --git a/tedana/tests/test_tedana.py b/tedana/tests/test_tedana.py index d7643cd18..2c9e8894f 100644 --- a/tedana/tests/test_tedana.py +++ b/tedana/tests/test_tedana.py @@ -52,21 +52,20 @@ def test_outputs(): '.fcl_out.nii.gz', '__clin.nii.gz', '__clout.nii.gz', - # files are in order of creation above this point + 'veins_l0.nii', + 'veins_l1.nii', + 'ts_OC.nii', + 'hik_ts_OC.nii', + 'midk_ts_OC.nii', + 'lowk_ts_OC.nii', + 'dn_ts_OC.nii', + 'betas_OC.nii', 'betas_hik_OC.nii', + 'feats_OC2.nii', 'betas_hik_OC_T1c.nii', - 'betas_OC.nii', - 'dn_ts_OC.nii', 'dn_ts_OC_T1c.nii', - 'feats_OC2.nii', - 'hik_ts_OC.nii', 'hik_ts_OC_T1c.nii', - 'lowk_ts_OC.nii', - 'midk_ts_OC.nii', - 'sphis_hik.nii', - 'ts_OC.nii', - 'veins_l0.nii', - 'veins_l1.nii'] + 'sphis_hik.nii'] test_dir = Path('/home/neuro/data/TED/') res_dir = Path('/home/neuro/code/TED/') for fn in nifti_test_list: diff --git a/tedana/utils/__init__.py b/tedana/utils/__init__.py index 00b45b9bc..6257f8582 100644 --- a/tedana/utils/__init__.py +++ b/tedana/utils/__init__.py @@ -4,7 +4,7 @@ from .utils import ( load_image, load_data, get_dtype, make_min_mask, make_adaptive_mask, - unmask, filewrite, + unmask, filewrite, new_nii_like, fitgaussian, dice, andb, ) @@ -12,5 +12,5 @@ __all__ = [ 'load_image', 'load_data', 'get_dtype' 'make_min_mask', 'make_adaptive_mask', - 'unmask', 'filewrite', + 'unmask', 'filewrite', 'new_nii_like', 'fitgaussian', 'dice', 'andb'] diff --git a/tedana/utils/utils.py b/tedana/utils/utils.py index 165c8b98f..4a46456c3 100644 --- a/tedana/utils/utils.py +++ b/tedana/utils/utils.py @@ -121,6 +121,8 @@ def load_data(data, n_echos=None): # create reference image ref_img = img.__class__(np.zeros((nx, ny, nz)), affine=img.affine, header=img.header, extra=img.extra) + ref_img.header.extensions = [] + ref_img.header.set_sform(ref_img.header.get_sform(), code=1) return fdata, ref_img @@ -380,7 +382,7 @@ def unmask(data, mask): Unmasked `data` array """ - out = np.zeros(mask.shape + data.shape[1:]) + out = np.zeros(mask.shape + data.shape[1:], dtype=data.dtype) out[mask] = data return out From b8b59571307bfd6674c99926297714182cb7aa60 Mon Sep 17 00:00:00 2001 From: Ross Markello Date: Wed, 9 May 2018 19:43:00 -0400 Subject: [PATCH 15/18] [FIX] Bugs in selcomps for gifti/nifti --- tedana/interfaces/tedana.py | 42 +++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py index 64ab54806..0e90aa69b 100644 --- a/tedana/interfaces/tedana.py +++ b/tedana/interfaces/tedana.py @@ -655,7 +655,7 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img, return seldict, comptab, betas, mmix_new -def selcomps(seldict, mmix, ref_img, manacc, n_echos, olevel=2, oversion=99, +def selcomps(seldict, mmix, mask, ref_img, manacc, n_echos, olevel=2, oversion=99, filecsdata=False, savecsdiag=True, strict_mode=False): """ Labels components in `mmix` @@ -802,7 +802,7 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, olevel=2, oversion=99, for ii in nc: # convert data back to 3D array if get_dtype(ref_img) == 'NIFTI': - tproj = new_nii_like(unmask(seldict['PSC'], mask)[:, ii], ref_img).get_data() + tproj = new_nii_like(ref_img, unmask(seldict['PSC'], mask)[:, ii]).get_data() else: tproj = unmask(seldict['PSC'], mask)[:, ii] fproj = np.fft.fftshift(np.abs(np.fft.rfftn(tproj))) @@ -917,7 +917,7 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, olevel=2, oversion=99, # Select index that maximizes Dice with guessmask but first # minimizes number of higher Rho components ii = int(epsmap[np.argmax(epsmap[epsmap[:, 2] == np.min(epsmap[:, 2]), 1], 0), 0]) - lgr.info('++ Component selection tuning: ', epsmap[:, 1].max()) + lgr.info('++ Component selection tuning: {:.05f}'.format(epsmap[:, 1].max())) db = DBSCAN(eps=.005+ii*.005, min_samples=3).fit(fz.T) ncl = nc[db.labels_ == 0] ncl = np.setdiff1d(ncl, rej) @@ -953,8 +953,8 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, olevel=2, oversion=99, 'Kappa cut point', 'Rho cut point', 'DBSCAN failed to converge', 'Mid-Kappa failed (limited BOLD signal)', 'Kappa-Rho guess', 'min_acc', 'toacc_hi'] - diagstep_vals = [rej, KRcut, Kcut, Rcut, dbscanfailed, - midkfailed, KRguess, min_acc, toacc_hi] + diagstep_vals = [rej.tolist(), KRcut, Kcut, Rcut, dbscanfailed, + midkfailed, KRguess.tolist(), min_acc.tolist(), toacc_hi.tolist()] with open('csstepdata.txt', 'w') as ofh: json.dump(dict(zip(diagstep_keys, diagstep_vals)), ofh, indent=4, sort_keys=True) @@ -1069,7 +1069,7 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, olevel=2, oversion=99, group0_res = np.intersect1d(KRguess, group0) phys_var_zs.append((vvex - vvex[group0_res].mean()) / vvex[group0_res].std()) veinBout = unmask(veinmaskB, mask) - filewrite(veinBout, 'veins_l%i' % t2sl_i, ref_img) + filewrite(veinBout.astype(int), 'veins_l%i' % t2sl_i, ref_img) # Mask to sample veins phys_var_z = np.array(phys_var_zs).max(0) @@ -1135,10 +1135,12 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, olevel=2, oversion=99, 'Mid-kappa components', 'svm_acc_fail', 'toacc_hi', 'toacc_lo', 'Field artifacts', 'Physiological artifacts', 'Miscellaneous artifacts', 'ncl', 'Ignored components'] - diagstep_vals = [rej, KRcut, Kcut, Rcut, dbscanfailed, - KRguess, dice_rej, rej_supp, to_clf, - midk, svm_acc_fail, toacc_hi, toacc_lo, - field_art, phys_art, misc_art, ncl, ign] + diagstep_vals = [rej.tolist(), KRcut, Kcut, Rcut, dbscanfailed, + KRguess.tolist(), dice_rej, rej_supp.tolist(), + to_clf.tolist(), midk.tolist(), svm_acc_fail, + toacc_hi.tolist(), toacc_lo.tolist(), + field_art.tolist(), phys_art.tolist(), + misc_art.tolist(), ncl.tolist(), ign.tolist()] with open('csstepdata.txt', 'w') as ofh: json.dump(dict(zip(diagstep_keys, diagstep_vals)), ofh, indent=4, sort_keys=True) @@ -1437,7 +1439,7 @@ def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4): return dm_catd, dm_optcom -def gscontrol_mmix(OCcatd, mmix, acc, rej, midk, ref_img): +def gscontrol_mmix(OCcatd, mmix, mask, acc, rej, midk, ref_img): """ Parameters ---------- @@ -1512,7 +1514,7 @@ def gscontrol_mmix(OCcatd, mmix, acc, rej, midk, ref_img): np.savetxt('meica_mix_T1c.1D', mmixnogs) -def write_split_ts(data, mmix, acc, rej, midk, ref_img, suffix=''): +def write_split_ts(data, mmix, mask, acc, rej, midk, ref_img, suffix=''): """ Splits `data` into denoised / noise / ignored time series and saves to disk @@ -1547,7 +1549,7 @@ def write_split_ts(data, mmix, acc, rej, midk, ref_img, suffix=''): # get variance explained by retained components betas = get_coeffs(unmask(dmdata.T, mask), mask, mmix)[mask] varexpl = (1 - ((dmdata.T - betas.dot(mmix.T))**2.).sum() / (dmdata**2.).sum()) * 100 - lgr.info('++ Variance explained: ', varexpl, '%') + lgr.info('++ Variance explained: {:.02f}%'.format(varexpl)) # create component and de-noised time series and save to files hikts = betas[:, acc].dot(mmix.T[acc, :]) @@ -1697,7 +1699,7 @@ def writeresults(ts, mask, comptable, mmix, n_vols, acc, rej, midk, empty, ref_i lgr.info('++ Writing optimally combined time series') filewrite(ts, 'ts_OC', ref_img) lgr.info('++ Writing Kappa-filtered optimally combined timeseries') - varexpl = write_split_ts(ts, mmix, acc, rej, midk, ref_img, suffix='OC') + varexpl = write_split_ts(ts, mmix, mask, acc, rej, midk, ref_img, suffix='OC') lgr.info('++ Writing signal versions of components') ts_B = get_coeffs(ts, mask, mmix) filewrite(ts_B, 'betas_OC', ref_img) @@ -1712,7 +1714,7 @@ def writeresults(ts, mask, comptable, mmix, n_vols, acc, rej, midk, empty, ref_i varexpl=varexpl) -def writeresults_echoes(catd, mmix, acc, rej, midk, ref_img): +def writeresults_echoes(catd, mmix, mask, acc, rej, midk, ref_img): """ Saves individually denoised echos to disk @@ -1735,7 +1737,7 @@ def writeresults_echoes(catd, mmix, acc, rej, midk, ref_img): for i_echo in range(catd.shape[1]): lgr.info('++ Writing Kappa-filtered echo #{:01d} timeseries'.format(i_echo+1)) - write_split_ts(catd[:, i_echo, :], mmix, acc, rej, midk, ref_img, + write_split_ts(catd[:, i_echo, :], mmix, mask, acc, rej, midk, ref_img, suffix='e%i' % (i_echo+1)) @@ -1881,7 +1883,7 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False, reindex=True) np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix) - acc, rej, midk, empty = selcomps(seldict, mmix, ref_img, manacc, n_echos, + acc, rej, midk, empty = selcomps(seldict, mmix, mask, ref_img, manacc, n_echos, strict_mode=strict, filecsdata=filecsdata) else: @@ -1892,7 +1894,7 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False, ref_img, fout=fout) if ctab is None: - acc, rej, midk, empty = selcomps(seldict, mmix, ref_img, manacc, + acc, rej, midk, empty = selcomps(seldict, mmix, mask, ref_img, manacc, n_echos, filecsdata=filecsdata, strict_mode=strict) @@ -1903,6 +1905,6 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False, lgr.warning('++ No BOLD components detected!!! Please check data and results!') writeresults(OCcatd, mask, comptable, mmix, n_vols, acc, rej, midk, empty, ref_img) - gscontrol_mmix(OCcatd, mmix, acc, rej, midk, ref_img) + gscontrol_mmix(OCcatd, mmix, mask, acc, rej, midk, ref_img) if dne: - writeresults_echoes(catd, mmix, acc, rej, midk, ref_img) + writeresults_echoes(catd, mmix, mask, acc, rej, midk, ref_img) From ddafa7c02ae5c0ab526f70d987eed9ffe818763d Mon Sep 17 00:00:00 2001 From: Ross Markello Date: Wed, 9 May 2018 22:26:25 -0400 Subject: [PATCH 16/18] [FIX] Minor updates to doc-strings + names --- tedana/interfaces/tedana.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py index 0e90aa69b..65a032f40 100644 --- a/tedana/interfaces/tedana.py +++ b/tedana/interfaces/tedana.py @@ -110,7 +110,7 @@ def spatclust(data, mask, csize, thr, ref_img, infile=None, dindex=0, if infile is None: data = data.copy() data[data < thr] = 0 - infile = filewrite(unmask(data, mask), '__clin', ref_img, gzip=True) + infile = filewrite(unmask(data, mask), '__clin.nii.gz', ref_img) # FIXME: ideally no calls to os.system!!! (or AFNI, for that matter) addopts = '' @@ -503,7 +503,7 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img, n_samp, n_echos, n_components = betas.shape n_voxels = mask.sum() n_data_voxels = (t2s != 0).sum() - mu = catd.mean(axis=-1) + mu = catd.mean(axis=-1) # BUG: THIS IS THE BAD PLACE tes = np.reshape(tes, (n_echos, 1)) fmin, fmid, fmax = getfbounds(n_echos) @@ -600,7 +600,7 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img, if fout is not None: ccname, gzip = 'cc{:03d}'.format(i), False else: - ccname, gzip = '.cc_temp', True + ccname, gzip = '.cc_temp.nii.gz', True out[:, 0] = np.squeeze(unmask(PSC[:, i], mask)) out[:, 1] = np.squeeze(unmask(F_R2_maps[:, i], t2s != 0)) @@ -667,9 +667,11 @@ def selcomps(seldict, mmix, mask, ref_img, manacc, n_echos, olevel=2, oversion=9 mmix : (C x T) array_like Mixing matrix for converting input data to component space, where `C` is components and `T` is the number of volumes in the original data - ref_img + mask : (S,) array_like + Boolean mask array + ref_img : str or img_like Reference image to dictate how outputs are saved to disk - manacc + manacc : list Comma-separated list of indices of manually accepted components n_echos : int Number of echos in original data @@ -956,7 +958,7 @@ def selcomps(seldict, mmix, mask, ref_img, manacc, n_echos, olevel=2, oversion=9 diagstep_vals = [rej.tolist(), KRcut, Kcut, Rcut, dbscanfailed, midkfailed, KRguess.tolist(), min_acc.tolist(), toacc_hi.tolist()] - with open('csstepdata.txt', 'w') as ofh: + with open('csstepdata.json', 'w') as ofh: json.dump(dict(zip(diagstep_keys, diagstep_vals)), ofh, indent=4, sort_keys=True) return list(sorted(min_acc)), list(sorted(rej)), [], list(sorted(to_clf)) @@ -1142,7 +1144,7 @@ def selcomps(seldict, mmix, mask, ref_img, manacc, n_echos, olevel=2, oversion=9 field_art.tolist(), phys_art.tolist(), misc_art.tolist(), ncl.tolist(), ign.tolist()] - with open('csstepdata.txt', 'w') as ofh: + with open('csstepdata.json', 'w') as ofh: json.dump(dict(zip(diagstep_keys, diagstep_vals)), ofh, indent=4, sort_keys=True) allfz = np.array([Tz, Vz, Ktz, KRr, cnz, Rz, mmix_kurt, fdist_z]) np.savetxt('csdata.txt', allfz) @@ -1448,6 +1450,8 @@ def gscontrol_mmix(OCcatd, mmix, mask, acc, rej, midk, ref_img): mmix : (C x T) array_like Mixing matrix for converting input data to component space, where `C` is components and `T` is the same as in `OCcatd` + mask : (S,) array_like + Boolean mask array acc : list Indices of accepted (BOLD) components in `mmix` rej : list @@ -1525,6 +1529,8 @@ def write_split_ts(data, mmix, mask, acc, rej, midk, ref_img, suffix=''): mmix : (C x T) array_like Mixing matrix for converting input data to component space, where `C` is components and `T` is the same as in `data` + mask : (S,) array_like + Boolean mask array acc : list Indices of accepted (BOLD) components in `mmix` rej : list @@ -1725,6 +1731,8 @@ def writeresults_echoes(catd, mmix, mask, acc, rej, midk, ref_img): mmix : (C x T) array_like Mixing matrix for converting input data to component space, where `C` is components and `T` is the same as in `data` + mask : (S,) array_like + Boolean mask array acc : list Indices of accepted (BOLD) components in `mmix` rej : list From f9810ade5814c0ddb82423d59ab566feb1e6c56e Mon Sep 17 00:00:00 2001 From: Ross Markello Date: Wed, 9 May 2018 22:49:04 -0400 Subject: [PATCH 17/18] [FIX] float32 / float64 bug in `fitmodels_direct()` --- tedana/interfaces/tedana.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py index 65a032f40..25be1fa07 100644 --- a/tedana/interfaces/tedana.py +++ b/tedana/interfaces/tedana.py @@ -110,7 +110,7 @@ def spatclust(data, mask, csize, thr, ref_img, infile=None, dindex=0, if infile is None: data = data.copy() data[data < thr] = 0 - infile = filewrite(unmask(data, mask), '__clin.nii.gz', ref_img) + infile = filewrite(unmask(data, mask), '__clin.nii.gz', ref_img, gzip=True) # FIXME: ideally no calls to os.system!!! (or AFNI, for that matter) addopts = '' @@ -503,7 +503,7 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img, n_samp, n_echos, n_components = betas.shape n_voxels = mask.sum() n_data_voxels = (t2s != 0).sum() - mu = catd.mean(axis=-1) # BUG: THIS IS THE BAD PLACE + mu = catd.mean(axis=-1, dtype=float) # BUG: THIS IS THE BAD PLACE tes = np.reshape(tes, (n_echos, 1)) fmin, fmid, fmax = getfbounds(n_echos) @@ -619,11 +619,12 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img, # Do simple clustering on F # TODO: can be replaced with nilearn.image.threshold_img - os.system('3dcalc -overwrite -a {}[1..2] -expr \'a*step(a-{})\' -prefix ' - '.fcl_in.nii.gz -overwrite'.format(ccname, fmin)) + # TODO: fmin is being cast to an integer here -- is that purposeful?! + os.system('3dcalc -overwrite -a {}[1..2] -expr \'a*step(a-{:0d})\' -prefix ' + '.fcl_in.nii.gz -overwrite'.format(ccname, int(fmin))) # TODO: can be replaced with nilearn.regions.connected_regions - os.system('3dmerge -overwrite -dxyz=1 -1clust 1 {} -doall ' - '-prefix .fcl_out.nii.gz .fcl_in.nii.gz'.format(csize)) + os.system('3dmerge -overwrite -dxyz=1 -1clust 1 {:0d} -doall ' + '-prefix .fcl_out.nii.gz .fcl_in.nii.gz'.format(int(csize))) sel = load_image('.fcl_out.nii.gz')[t2s != 0] sel = np.array(sel != 0, dtype=np.int) F_R2_clmaps[:, i] = sel[:, 0] @@ -1071,7 +1072,7 @@ def selcomps(seldict, mmix, mask, ref_img, manacc, n_echos, olevel=2, oversion=9 group0_res = np.intersect1d(KRguess, group0) phys_var_zs.append((vvex - vvex[group0_res].mean()) / vvex[group0_res].std()) veinBout = unmask(veinmaskB, mask) - filewrite(veinBout.astype(int), 'veins_l%i' % t2sl_i, ref_img) + filewrite(veinBout.astype(float), 'veins_l%i' % t2sl_i, ref_img) # Mask to sample veins phys_var_z = np.array(phys_var_zs).max(0) From ef6c34ddfb0ce5d181de075d0082e988ebd5efe6 Mon Sep 17 00:00:00 2001 From: Ross Markello Date: Thu, 10 May 2018 10:38:52 -0400 Subject: [PATCH 18/18] [FIX] Address review comments for #22 Minor changes to address review comments for PR #22 --- tedana/interfaces/t2smap.py | 4 +--- tedana/interfaces/tedana.py | 9 ++++----- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/tedana/interfaces/t2smap.py b/tedana/interfaces/t2smap.py index 8e2d5d3af..275bb1695 100644 --- a/tedana/interfaces/t2smap.py +++ b/tedana/interfaces/t2smap.py @@ -114,7 +114,7 @@ def t2sadmap(data, tes, mask, masksum, start_echo): x = np.column_stack([np.ones(echo), [-te for te in tes[:echo]]]) X = np.repeat(x, n_vols, axis=0) - beta, res, rank, sing = np.linalg.lstsq(X, B) + beta = np.linalg.lstsq(X, B)[0] t2s = 1. / beta[1, :].T s0 = np.exp(beta[0, :]).T @@ -145,8 +145,6 @@ def make_optcom(data, t2s, tes, mask, combmode): """ Optimally combine BOLD data across TEs. - out = make_optcom(data,t2s) - Parameters ---------- data : (S x E x T) :obj:`numpy.ndarray` diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py index 25be1fa07..203e82a80 100644 --- a/tedana/interfaces/tedana.py +++ b/tedana/interfaces/tedana.py @@ -50,8 +50,8 @@ def do_svm(X_train, y_train, X_test, svmtype=0): X_test : (N2 x F) array_like Test vectors, where n_samples is the number of samples in the test dataset and n_features is the number of features. - svmtype : int - Desired support vector machine type + svmtype : int, optional + Desired support vector machine type. Must be in [0, 1, 2]. Default: 0 Returns ------- @@ -68,7 +68,7 @@ def do_svm(X_train, y_train, X_test, svmtype=0): elif svmtype == 2: clf = svm.SVC(kernel='linear', probability=True) else: - raise ValueError('Input svmtype not in [1, 2, 3]') + raise ValueError('Input svmtype not in [0, 1, 2]: {}'.format(svmtype)) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) @@ -112,7 +112,6 @@ def spatclust(data, mask, csize, thr, ref_img, infile=None, dindex=0, data[data < thr] = 0 infile = filewrite(unmask(data, mask), '__clin.nii.gz', ref_img, gzip=True) - # FIXME: ideally no calls to os.system!!! (or AFNI, for that matter) addopts = '' if data is not None and data.squeeze().ndim > 1 and dindex + tindex == 0: addopts = '-doall' @@ -503,7 +502,7 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img, n_samp, n_echos, n_components = betas.shape n_voxels = mask.sum() n_data_voxels = (t2s != 0).sum() - mu = catd.mean(axis=-1, dtype=float) # BUG: THIS IS THE BAD PLACE + mu = catd.mean(axis=-1, dtype=float) tes = np.reshape(tes, (n_echos, 1)) fmin, fmid, fmax = getfbounds(n_echos)