From 628bf64e5f59bddc138731aa18d359163d6cf4a4 Mon Sep 17 00:00:00 2001
From: Ross Markello <rossmarkello@gmail.com>
Date: Fri, 27 Apr 2018 10:50:19 -0400
Subject: [PATCH 01/18] [STY] Mostly stylistic, a few enhancements...

Modified some code in utils.py to be more readable and to accept various input
types (e.g., lists of data files, arrays, etc.). In the process, changed a some
doc-strings, modified the print statements to be logging statements, and made a
few comments for identification of things that need to be changed to better
integrate surface files.
---
 tedana/interfaces/t2smap.py |  11 +-
 tedana/interfaces/tedana.py | 189 +++++++++++++++---------------
 tedana/utils/utils.py       | 228 +++++++++++++++++++++---------------
 3 files changed, 237 insertions(+), 191 deletions(-)

diff --git a/tedana/interfaces/t2smap.py b/tedana/interfaces/t2smap.py
index bfbe67e76..68ae8ec2c 100644
--- a/tedana/interfaces/t2smap.py
+++ b/tedana/interfaces/t2smap.py
@@ -3,6 +3,9 @@
 from tedana.utils import (niwrite, cat2echos,
                           makeadmask, unmask, fmask)
 
+import logging
+lgr = logging.getLogger(__name__)
+
 
 def t2sadmap(catd, mask, tes, masksum, start_echo):
     """
@@ -120,16 +123,16 @@ def main(options):
     catd = cat2echos(catim.get_data(), ne)
     nx, ny, nz, Ne, nt = catd.shape
 
-    print("++ Computing Mask")
-    mask, masksum = makeadmask(catd, min=False, getsum=True)
+    lgr.info('++ Computing Mask')
+    mask, masksum = makeadmask(catd, minimum=False, getsum=True)
 
-    print("++ Computing Adaptive T2* map")
+    lgr.info('++ Computing Adaptive T2* map')
     t2s, s0, t2ss, s0vs, t2saf, s0vaf = t2sadmap(catd, mask, tes, masksum, 2)
     niwrite(masksum, aff, 'masksum%s.nii' % suf)
     niwrite(t2ss, aff, 't2ss%s.nii' % suf)
     niwrite(s0vs, aff, 's0vs%s.nii' % suf)
 
-    print("++ Computing optimal combination")
+    lgr.info('++ Computing optimal combination')
     tsoc = np.array(optcom(catd,
                            t2s,
                            tes,
diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py
index 139279ef4..b69b99dc3 100644
--- a/tedana/interfaces/tedana.py
+++ b/tedana/interfaces/tedana.py
@@ -1,4 +1,5 @@
 import os
+import shutil
 import sys
 import pickle
 import textwrap
@@ -11,6 +12,9 @@
                           makeadmask, fmask, unmask,
                           fitgaussian, niwrite, dice, andb)
 
+import logging
+lgr = logging.getLogger(__name__)
+
 """
 PROCEDURE 2 : Computes ME-PCA and ME-ICA
 -Computes T2* map
@@ -72,34 +76,27 @@ def do_svm(X_train, y_train, X_test, svmtype=0):
 def spatclust(data, mask, csize, thr, header, aff, infile=None, dindex=0,
               tindex=0):
     """
+    Thresholds and spatially clusters `data`
 
     Parameters
     ----------
-    data :
-
+    data : array_like
     mask :
-
-    csize :
-
+    csize : int
     thr :
-
     header :
-
     aff :
-
     infile :
-
     dindex :
-
     tindex :
 
-
     Returns
     -------
     clustered :
+    """
 
+    # threshold image
 
-    """
     if infile is None:
         data = data.copy()
         data[data < thr] = 0
@@ -111,6 +108,7 @@ def spatclust(data, mask, csize, thr, header, aff, infile=None, dindex=0,
     else:
         addopts = '-1dindex {0} -1tindex {1}'.format(str(dindex), str(tindex))
 
+    #
     cmd_str = '3dmerge -overwrite {0} -dxyz=1 -1clust 1 {1:d} ' \
               '-1thresh {2:.02f} -prefix __clout.nii.gz {3}'
     os.system(cmd_str.format(addopts, int(csize), float(thr), infile))
@@ -119,7 +117,8 @@ def spatclust(data, mask, csize, thr, header, aff, infile=None, dindex=0,
 
 
 def rankvec(vals):
-    """Returns ranks of array.
+    """
+    Returns ranks of array
 
     Parameters
     ----------
@@ -133,7 +132,7 @@ def rankvec(vals):
     """
     try:
         vals = np.array(vals)
-    except:
+    except:  # would this ever happen????
         raise IOError('Input vals is not array_like')
 
     if len(vals.shape) != 1:
@@ -280,22 +279,22 @@ def getelbow_aggr(ks, val=False):
         return maxcurv
 
 
-def getfbounds(ne):
+def getfbounds(n_echos):
     """
 
     Parameters
     ----------
-    ne : int
+    n_echos : int
         Number of echoes.
 
     Returns
     -------
     """
-    if not isinstance(ne, int):
-        raise IOError('Input ne must be int')
-    elif ne <= 0:
-        raise ValueError('Input ne must be greater than 0')
-    idx = ne - 1
+    if not isinstance(n_echos, int):
+        raise IOError('Input n_echos must be int')
+    elif n_echos <= 0:
+        raise ValueError('Input n_echos must be greater than 0')
+    idx = n_echos - 1
 
     F05s = [None, None, 18.5, 10.1, 7.7, 6.6, 6.0, 5.6, 5.3, 5.1, 5.0]
     F025s = [None, None, 38.5, 17.4, 12.2, 10, 8.8, 8.1, 7.6, 7.2, 6.9]
@@ -308,12 +307,12 @@ def eimask(dd, ees=None):
         ees = range(dd.shape[1])
     imask = np.zeros([dd.shape[0], len(ees)])
     for ee in ees:
-        print(ee)
+        lgr.info(ee)
         lthr = 0.001 * stats.scoreatpercentile(dd[:, ee, :].flatten(),
                                                98, interpolation_method='lower')
         hthr = 5 * stats.scoreatpercentile(dd[:, ee, :].flatten(),
                                            98, interpolation_method='lower')
-        print(lthr, hthr)
+        lgr.info(lthr, hthr)
         imask[dd[:, ee, :].mean(1) > lthr, ee] = 1
         imask[dd[:, ee, :].mean(1) > hthr, ee] = 0
     return imask
@@ -399,16 +398,16 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, head,
     totvar_norm = (WTS**2).sum()
 
     # Compute Betas and means over TEs for TE-dependence analysis
-    Ne = len(tes)
-    betas = cat2echos(get_coeffs(uncat2echos(catd, Ne),
-                                 np.tile(mask, (1, 1, Ne)),
-                                 mmix), Ne)
-    nx, ny, nz, Ne, nc = betas.shape
+    n_echos = len(tes)
+    betas = cat2echos(get_coeffs(uncat2echos(catd),
+                                 np.tile(mask, (1, 1, n_echos)),
+                                 mmix), n_echos)
+    nx, ny, nz, n_echos, nc = betas.shape
     Nm = mask.sum()
     NmD = (t2s != 0).sum()
     mu = catd.mean(axis=-1)
-    tes = np.reshape(tes, (Ne, 1))
-    fmin, fmid, fmax = getfbounds(Ne)
+    tes = np.reshape(tes, (n_echos, 1))
+    fmin, fmid, fmax = getfbounds(n_echos)
 
     # Mask arrays
     mumask = fmask(mu, t2s != 0)
@@ -448,14 +447,14 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, head,
 
         # S0 Model
         coeffs_S0 = (B * X1).sum(axis=0) / (X1**2).sum(axis=0)
-        SSE_S0 = (B - X1 * np.tile(coeffs_S0, (Ne, 1)))**2
+        SSE_S0 = (B - X1 * np.tile(coeffs_S0, (n_echos, 1)))**2
         SSE_S0 = SSE_S0.sum(axis=0)
         F_S0 = (alpha - SSE_S0) * 2 / (SSE_S0)
         F_S0_maps[:, i] = F_S0
 
         # R2 Model
         coeffs_R2 = (B * X2).sum(axis=0) / (X2**2).sum(axis=0)
-        SSE_R2 = (B - X2 * np.tile(coeffs_R2, (Ne, 1)))**2
+        SSE_R2 = (B - X2 * np.tile(coeffs_R2, (n_echos, 1)))**2
         SSE_R2 = SSE_R2.sum(axis=0)
         F_R2 = (alpha - SSE_R2)*2/(SSE_R2)
         F_R2_maps[:, i] = F_R2
@@ -522,8 +521,10 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, head,
             csize = np.max([int(Nm * 0.0005) + 5, 20])
 
             # Do simple clustering on F
+            # TODO: can be replaced with nilearn.image.threshold_img
             os.system("3dcalc -overwrite -a %s[1..2] -expr 'a*step(a-%i)' -prefix .fcl_in.nii.gz "
                       "-overwrite" % (ccname, fmin))
+            # TODO: can be replaced with nilearn.regions.connected_regions
             os.system('3dmerge -overwrite -dxyz=1 -1clust 1 %i -doall '
                       '-prefix .fcl_out.nii.gz .fcl_in.nii.gz' % (csize))
             sel = fmask(nib.load('.fcl_out.nii.gz').get_data(), t2s != 0) != 0
@@ -574,7 +575,7 @@ def selcomps(seldict, mmix, head, manacc, debug=False, olevel=2, oversion=99,
     if filecsdata:
         import bz2
         if seldict is not None:
-            print("Saving component selection data")
+            lgr.info('Saving component selection data')
             csstate_f = bz2.BZ2File('compseldata.pklbz', 'wb')
             pickle.dump(seldict, csstate_f)
             csstate_f.close()
@@ -584,7 +585,7 @@ def selcomps(seldict, mmix, head, manacc, debug=False, olevel=2, oversion=99,
                 seldict = pickle.load(csstate_f)
                 csstate_f.close()
             except FileNotFoundError:
-                print("No component data found!")
+                lgr.info('No component data found!')
                 return None
 
     # Dump dictionary into variable names
@@ -643,7 +644,7 @@ def selcomps(seldict, mmix, head, manacc, debug=False, olevel=2, oversion=99,
             mwu = stats.norm.ppf(stats.mannwhitneyu(signal_FR2_Z, noise_FR2_Z)[1])
             tt_table[ii, 0] = np.abs(mwu) * ttest[0] / np.abs(ttest[0])
             tt_table[ii, 1] = ttest[1]
-        except:
+        except:  # TODO: what is the error that might be caught here?
             pass
     tt_table[np.isnan(tt_table)] = 0
     tt_table[np.isinf(tt_table[:, 0]), 0] = np.percentile(tt_table[~np.isinf(tt_table[:, 0]), 0],
@@ -711,7 +712,7 @@ def selcomps(seldict, mmix, head, manacc, debug=False, olevel=2, oversion=99,
     """
     # epsmap is [index,level of overlap with dicemask,
     # number of high Rho components]
-    F05, F025, F01 = getfbounds(ne)
+    F05, F025, F01 = getfbounds(n_echos)
     epsmap = []
     Rhos_sorted = np.array(sorted(Rhos))[::-1]
     # Make an initial guess as to number of good components based on
@@ -721,11 +722,11 @@ def selcomps(seldict, mmix, head, manacc, debug=False, olevel=2, oversion=99,
                     getelbow_cons(Kappas), getelbow_aggr(Kappas)]
     Kelbowval = np.median([getelbow_mod(Kappas, val=True),
                            getelbow_cons(Kappas, val=True),
-                           getelbow_aggr(Kappas, val=True)] + list(getfbounds(ne)))
+                           getelbow_aggr(Kappas, val=True)] + list(getfbounds(n_echos)))
     Khighelbowval = stats.scoreatpercentile([getelbow_mod(Kappas, val=True),
                                              getelbow_cons(Kappas, val=True),
                                              getelbow_aggr(Kappas, val=True)] +
-                                            list(getfbounds(ne)),
+                                            list(getfbounds(n_echos)),
                                             75, interpolation_method='lower')
     KRcut = np.median(KRcutguesses)
     # only use exclusive when inclusive is extremely inclusive - double KRcut
@@ -779,7 +780,7 @@ def selcomps(seldict, mmix, head, manacc, debug=False, olevel=2, oversion=99,
                            nc[Rhos > getelbow_mod(Rhos_sorted,
                                                   val=True)]).shape[0]])
             if debug:
-                print("found solution", ii, db.labels_)
+                lgr.info('found solution', ii, db.labels_)
         db = None
 
     epsmap = np.array(epsmap)
@@ -789,7 +790,7 @@ def selcomps(seldict, mmix, head, manacc, debug=False, olevel=2, oversion=99,
         # Select index that maximizes Dice with guessmask but first
         # minimizes number of higher Rho components
         ii = int(epsmap[np.argmax(epsmap[epsmap[:, 2] == np.min(epsmap[:, 2]), 1], 0), 0])
-        print('Component selection tuning: ', epsmap[:, 1].max())
+        lgr.info('Component selection tuning: ', epsmap[:, 1].max())
         db = DBSCAN(eps=.005+ii*.005, min_samples=3).fit(fz.T)
         ncl = nc[db.labels_ == 0]
         ncl = np.setdiff1d(ncl, rej)
@@ -799,7 +800,7 @@ def selcomps(seldict, mmix, head, manacc, debug=False, olevel=2, oversion=99,
         to_clf = np.setdiff1d(nc, np.union1d(ncl, rej))
     if len(group0) == 0 or len(group0) < len(KRguess) * .5:
         dbscanfailed = True
-        print("DBSCAN based guess failed. Using elbow guess method.")
+        lgr.info('DBSCAN based guess failed. Using elbow guess method.')
         ncl = np.setdiff1d(np.setdiff1d(nc[KRelbow == 2], rej),
                            np.union1d(nc[tt_table[:, 0] < tt_lim],
                            np.union1d(np.union1d(nc[spz > 1],
@@ -810,8 +811,8 @@ def selcomps(seldict, mmix, head, manacc, debug=False, olevel=2, oversion=99,
         group_n1 = []
         to_clf = np.setdiff1d(nc, np.union1d(group0, rej))
     if len(group0) < 2 or (len(group0) < 4 and float(len(rej))/len(group0) > 3):
-        print("WARNING: Extremely limited reliable BOLD signal space. "
-              "Not filtering further into midk etc.")
+        lgr.info('WARNING: Extremely limited reliable BOLD signal space. '
+                 'Not filtering further into midk etc.')
         midkfailed = True
         min_acc = np.array([])
         if len(group0) != 0:
@@ -1021,23 +1022,23 @@ def selcomps(seldict, mmix, head, manacc, debug=False, olevel=2, oversion=99,
 
 
 def tedpca(combmode, mask, stabilize, head, ste=0, mlepca=True):
-    nx, ny, nz, ne, nt = catd.shape
+    nx, ny, nz, n_echos, nt = catd.shape
     ste = np.array([int(ee) for ee in str(ste).split(',')])
     if len(ste) == 1 and ste[0] == -1:
-        print("-Computing PCA of optimally combined multi-echo data")
+        lgr.info('-Computing PCA of optimally combined multi-echo data')
         OCmask = make_mask(OCcatd[:, :, :, np.newaxis, :])
         d = fmask(OCcatd, OCmask)
         eim = eimask(d[:, np.newaxis, :])
         eim = eim[:, 0] == 1
         d = d[eim, :]
     elif len(ste) == 1 and ste[0] == 0:
-        print("-Computing PCA of spatially concatenated multi-echo data")
-        ste = np.arange(ne)
+        lgr.info('-Computing PCA of spatially concatenated multi-echo data')
+        ste = np.arange(n_echos)
         d = np.float64(fmask(catd, mask))
         eim = eimask(d) == 1
         d = d[eim]
     else:
-        print("-Computing PCA of TE #%s" % ','.join([str(ee) for ee in ste]))
+        lgr.info('-Computing PCA of TE #%s' % ','.join([str(ee) for ee in ste]))
         d = np.float64(np.concatenate([fmask(catd[:, :, :, ee, :],
                                              mask)[:, np.newaxis, :] for ee in ste-1], axis=1))
         eim = eimask(d) == 1
@@ -1089,17 +1090,17 @@ def tedpca(combmode, mask, stabilize, head, ste=0, mlepca=True):
         ctb = np.vstack([ctb.T[0:3], sp]).T
 
         # Save state
-        print("Saving PCA")
+        lgr.info('Saving PCA')
         pcastate = {'u': u, 's': s, 'v': v, 'ctb': ctb,
                     'eigelb': eigelb, 'spmin': spmin, 'spcum': spcum}
         try:
             with open('pcastate.pkl', 'wb') as handle:
                 pickle.dump(pcastate, handle)
         except TypeError:
-            print("Could not save PCA solution!")
+            lgr.info('Could not save PCA solution!')
 
     else:  # if loading existing state
-        print("Loading PCA")
+        lgr.info('Loading PCA')
         with open('pcastate.pkl', 'rb') as handle:
             pcastate = pickle.load(handle)
         (u, s, v, ctb,
@@ -1112,7 +1113,7 @@ def tedpca(combmode, mask, stabilize, head, ste=0, mlepca=True):
 
     kappas = ctb[ctb[:, 1].argsort(), 1]
     rhos = ctb[ctb[:, 2].argsort(), 2]
-    fmin, fmid, fmax = getfbounds(ne)
+    fmin, fmid, fmax = getfbounds(n_echos)
     kappa_thr = np.average(sorted([fmin, getelbow_mod(kappas, val=True)/2, fmid]),
                            weights=[kdaw, 1, 1])
     rho_thr = np.average(sorted([fmin, getelbow_cons(rhos, val=True)/2, fmid]),
@@ -1144,7 +1145,7 @@ def tedpca(combmode, mask, stabilize, head, ste=0, mlepca=True):
     dd = u.dot(np.diag(s*np.array(pcsel, dtype=np.int))).dot(v)
 
     nc = s[pcsel].shape[0]
-    print("--Selected %i components. Minimum Kappa=%0.2f Rho=%0.2f" % (nc, kappa_thr, rho_thr))
+    lgr.info('--Selected %i components. Minimum Kappa=%0.2f Rho=%0.2f' % (nc, kappa_thr, rho_thr))
 
     dd = ((dd.T - dd.T.mean(0)) / dd.T.std(0)).T  # Variance normalize timeseries
     dd = (dd - dd.mean()) / dd.std()  # Variance normalize everything
@@ -1171,7 +1172,7 @@ def tedica(nc, dd, conv, fixed_seed, cost, final_cost):
     return mmix
 
 
-def gscontrol_raw(OCcatd, head, Ne, dtrank=4):
+def gscontrol_raw(OCcatd, head, n_echos, dtrank=4):
     """
     This function uses the spatial global signal estimation approach to
     modify catd (global variable) to removal global signal out of individual
@@ -1180,7 +1181,7 @@ def gscontrol_raw(OCcatd, head, Ne, dtrank=4):
     polynomial basis of order=0 and degree=dtrank.
     """
 
-    print("++ Applying amplitude-based T1 equilibration correction")
+    lgr.info('++ Applying amplitude-based T1 equilibration correction')
 
     # Legendre polynomial basis for denoising
     from scipy.special import lpmv
@@ -1216,7 +1217,7 @@ def gscontrol_raw(OCcatd, head, Ne, dtrank=4):
     niwrite(OCcatd, aff, 'tsoc_nogs.nii', head)
 
     # Project glbase out of each echo
-    for ii in range(Ne):
+    for ii in range(n_echos):
         dat = catd[:, :, :, ii, :][Gmask]
         sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T)
         e_nogs = dat - np.dot(np.atleast_2d(sol[0][dtrank]).T,
@@ -1245,7 +1246,7 @@ def gscontrol_mmix(mmix, acc, rej, midk, empty, head):
     bold_ts = np.dot(solG[0].T[:, acc], mmix[:, acc].T)
     sphis = bold_ts.min(-1)
     sphis -= sphis.mean()
-    print(sphis.shape)
+    lgr.info(sphis.shape)
     niwrite(unmask(sphis, mask), aff, 'sphis_hik.nii', head)
 
     """
@@ -1290,7 +1291,7 @@ def write_split_ts(data, comptable, mmix, acc, rej, midk, head, suffix=''):
                              mask, mmix), mask)
     dmdata = mdata.T-mdata.T.mean(0)
     varexpl = (1-((dmdata.T-betas.dot(mmix.T))**2.).sum()/(dmdata**2.).sum())*100
-    print('Variance explained: ', varexpl, '%')
+    lgr.info('Variance explained: ', varexpl, '%')
     midkts = betas[:, midk].dot(mmix.T[midk, :])
     lowkts = betas[:, rej].dot(mmix.T[rej, :])
     if len(acc) != 0:
@@ -1361,31 +1362,31 @@ def writect(comptable, nt, acc, rej, midk, empty, ctname='', varexpl='-1'):
 
 
 def writeresults(OCcatd, comptable, mmix, nt, acc, rej, midk, empty, head):
-    print("++ Writing optimally combined time series")
+    lgr.info('++ Writing optimally combined time series')
     ts = OCcatd
     niwrite(ts, aff, 'ts_OC.nii', head)
-    print("++ Writing Kappa-filtered optimally combined timeseries")
+    lgr.info('++ Writing Kappa-filtered optimally combined timeseries')
     varexpl = write_split_ts(ts, comptable, mmix, acc,
                              rej, midk, head, suffix='OC')
-    print("++ Writing signal versions of components")
+    lgr.info('++ Writing signal versions of components')
     ts_B = get_coeffs(ts, mask, mmix)
     niwrite(ts_B[:, :, :, :], aff, '_'.join(['betas', 'OC']) + '.nii', head)
 
     if len(acc) != 0:
         niwrite(ts_B[:, :, :, acc], aff, '_'.join(['betas_hik', 'OC'])+'.nii',
                 head)
-        print("++ Writing optimally combined high-Kappa features")
+        lgr.info('++ Writing optimally combined high-Kappa features')
         writefeats(split_ts(ts, comptable, mmix,
                             acc, rej, midk)[0],
                    mmix[:, acc], mask, head, suffix='OC2')
-    print("++ Writing component table")
+    lgr.info('++ Writing component table')
     writect(comptable, nt, acc, rej, midk, empty,
             ctname='comp_table.txt', varexpl=varexpl)
 
 
 def writeresults_echoes(acc, rej, midk, head, comptable, mmix):
-    for ii in range(ne):
-        print("++ Writing Kappa-filtered TE#%i timeseries" % (ii+1))
+    for ii in range(n_echos):
+        lgr.info('++ Writing Kappa-filtered TE#%i timeseries' % (ii+1))
         write_split_ts(catd[:, :, :, ii, :], comptable, mmix,
                        acc, rej, midk, head, suffix='e%i' % (ii+1))
 
@@ -1400,19 +1401,23 @@ def main(options):
              stabilize=False, fout=False, filecsdata=False, label=None,
              fixed_seed=42
     """
-    global tes, ne, catd, head, aff
-    tes = [float(te) for te in options.tes]
-    ne = len(tes)
-    catim = nib.load(options.data[0])
 
-    head = catim.get_header()
-    head.extensions = []
-    head.set_sform(head.get_sform(), code=1)
-    aff = catim.get_affine()
-    catd = cat2echos(catim.get_data(), ne)
-    nx, ny, nz, Ne, nt = catd.shape
+    # off to a bad start with globals
+    global tes, n_echos, catd, head, aff
+    tes = [float(te) for te in options.tes]
+    n_echos = len(tes)
 
-    # Parse options, prepare output directory
+    # get some info on the input data
+    # TODO: only works on nifti
+    catim = nib.load(options.data[0])
+    head = catim.header
+    head.extensions = []  # clear extension info in header
+    head.set_sform(head.get_sform(), code=1)  # reset sform code
+    aff = catim.get_affine()  # TODO: gifti has no affine
+    catd = cat2echos(options.data, n_echos=n_echos)
+    nx, ny, nz, n_echos, nt = catd.shape
+
+    # parse options, prepare output directory
     if options.fout:
         options.fout = aff
     else:
@@ -1427,36 +1432,30 @@ def main(options):
     rdaw = float(options.rdaw)
 
     if options.label is not None:
-        dirname = '%s' % '.'.join(['TED', options.label])
+        dirname = '.'.join(['TED', options.label])
     else:
         dirname = 'TED'
-    os.system('mkdir %s' % dirname)
+    os.mkdir(dirname)
     if options.mixm is not None:
         try:
-            os.system('cp %s %s/meica_mix.1D; cp %s %s/%s' % (options.mixm,
-                                                              dirname,
-                                                              options.mixm,
-                                                              dirname,
-                                                              os.path.basename(options.mixm)))
-        except:
+            shutil.copyfile(options.mixm, os.path.join(dirname, 'meica_mix.1D'))
+            shutil.copyfile(options.mixm, os.path.join(dirname, os.path.basename(options.mixm)))
+        except shutil.Error:
             pass
     if options.ctab is not None:
         try:
-            os.system('cp %s %s/comp_table.txt; cp %s %s/%s' % (options.mixm,
-                                                                dirname,
-                                                                options.mixm,
-                                                                dirname,
-                                                                os.path.basename(options.mixm)))
-        except:
+            shutil.copyfile(options.mixm, os.path.join(dirname, 'comp_table.txt'))
+            shutil.copyfile(options.mixm, os.path.join(dirname, os.path.basename(options.mixm)))
+        except shutil.Error:
             pass
 
     os.chdir(dirname)
 
-    print("++ Computing Mask")
+    lgr.info('++ Computing Mask')
     global mask
     mask, masksum = makeadmask(catd, minimum=False, getsum=True)
 
-    print("++ Computing T2* map")
+    lgr.info('++ Computing T2* map')
     global t2s, s0, t2ss, s0s, t2sG, s0G
     t2s, s0, t2ss, s0s, t2sG, s0G = t2sadmap(catd, mask, tes, masksum, 1)
 
@@ -1480,7 +1479,7 @@ def main(options):
         catd, OCcatd = gscontrol_raw(OCcatd, head, len(tes))
 
     if options.mixm is None:
-        print("++ Doing ME-PCA and ME-ICA")
+        lgr.info('++ Doing ME-PCA and ME-ICA')
 
         nc, dd = tedpca(combmode, mask, stabilize, head, ste=options.ste)
 
@@ -1520,8 +1519,8 @@ def main(options):
             acc, rej, midk, empty = ctabsel(options.ctab)
 
     if len(acc) == 0:
-        print("** WARNING! No BOLD components detected!!! ** \n"
-              "Please check data and results!")
+        lgr.info('** WARNING! No BOLD components detected!!! \n'
+                 '** Please check data and results!')
 
     writeresults(OCcatd, comptable, mmix, nt, acc, rej, midk, empty, head)
     gscontrol_mmix(mmix, acc, rej, midk, empty, head)
diff --git a/tedana/utils/utils.py b/tedana/utils/utils.py
index aa48d57b9..f697fb730 100644
--- a/tedana/utils/utils.py
+++ b/tedana/utils/utils.py
@@ -1,110 +1,143 @@
 """Utilities for meica package"""
 import numpy as np
 import nibabel as nib
+from nibabel.filename_parser import split_add_ext
+from nilearn._utils import check_niimg
+import nilearn.masking as nimask
 from scipy.optimize import leastsq
-from scipy.stats import scoreatpercentile
 
 from ..due import due, BibTeX
 
 
-def cat2echos(data, Ne):
+# TODO: Currently only accepts niftis -- do we need it to accept giftis?
+def cat2echos(data, n_echos=None):
     """
-    Separates z- and echo-axis in `data`
+    Coerces input `data` files to required array output
 
     Parameters
     ----------
-    data : array_like
-        Array of shape (nx, ny, nz*Ne, nt)
-    Ne : int
-        Number of echoes that were in original (uncombined) data array
+    data : (X x Y x M x T) array_like or list-of-niimg-like
+        Input multi-echo data array or independent echo files, where M is Z *
+        the number of echos
+    n_echos : int
+        Number of echos
 
     Returns
     -------
-    ndarray
-        Array of shape (nx, ny, nz, Ne, nt)
+    fdata : (X x Y x Z x E x T) np.ndarray
+        Where `X`, `Y`, `Z` are spatial dims, `E` is echos, and `T` is time
     """
 
-    nx, ny = data.shape[0:2]
-    nz = data.shape[2] // Ne
-    if len(data.shape) > 3:
-        nt = data.shape[3]
-    else:
-        nt = 1
-    return np.reshape(data, (nx, ny, nz, Ne, nt), order='F')
+    if isinstance(data, list):
+        # the individual echo files were provided
+        if len(data) > 2:
+            fdata = np.stack([nib.load(f).get_data() for f in data], axis=3)
+            # ensure we have a time dimension
+            if fdata.ndim < 5:
+                fdata = fdata[..., np.newaxis]
+            return fdata
+        # a z-concatenated file was provided (hopefully)
+        elif len(data) == 1:
+            if n_echos is None:
+                raise ValueError('Number of echos `n_echos` must be specified '
+                                 'if z-concatenated data file provided.')
+            data = nib.load(data[0]).get_data()
+        else:
+            raise ValueError('Cannot run `tedana` with only two echos: '
+                             '{}'.format(data))
 
+    # either an array or a z-concatenated file was provided
+    nx, ny, nz = data.shape[:2], data.shape[2] // n_echos
+    fdata = data.reshape(nx, ny, nz, n_echos, -1, order='F')
 
-def makeadmask(cdat, minimum=True, getsum=False):
+    return fdata
+
+
+def uncat2echos(data):
     """
-    Create a mask.
+    Combines Z- and echo-axis in `data`
+
+    Parameters
+    ----------
+    data : (X x Y x Z x E x T) array_like
+        Multi-echo data array
+
+    Returns
+    -------
+    fdata : (X x Y x M x T) np.ndarray
+        Z-concatenated multi-echo data array, where M is Z * number of echos
     """
-    nx, ny, nz, Ne, _ = cdat.shape
 
-    mask = np.ones((nx, ny, nz), dtype=np.bool)
+    if data.ndim < 4:
+        raise ValueError('Input data must have at least four dimensions; '
+                         'provided data has only {0}'.format(data.ndim))
 
-    if minimum:
-        mask = cdat.prod(axis=-1).prod(-1) != 0
-        return mask
-    else:
-        # Make a map of longest echo that a voxel can be sampled with,
-        # with minimum value of map as X value of voxel that has median
-        # value in the 1st echo. N.b. larger factor leads to bias to lower TEs
-        emeans = cdat.mean(-1)
-        medv = emeans[:, :, :, 0] == scoreatpercentile(emeans[:, :, :, 0][emeans[:, :, :, 0] != 0],
-                                                       33, interpolation_method='higher')
-        lthrs = np.squeeze(np.array([emeans[:, :, :, ee][medv] / 3 for ee in range(Ne)]))
-
-        if len(lthrs.shape) == 1:
-            lthrs = np.atleast_2d(lthrs).T
-        lthrs = lthrs[:, lthrs.sum(0).argmax()]
-
-        mthr = np.ones([nx, ny, nz, Ne])
-        for i_echo in range(Ne):
-            mthr[:, :, :, i_echo] *= lthrs[i_echo]
-        mthr = np.abs(emeans) > mthr
-        masksum = np.array(mthr, dtype=np.int).sum(-1)
-        mask = masksum != 0
-        if getsum:
-            return mask, masksum
-        else:
-            return mask
+    (nx, ny), nz = data.shape[:2], np.prod(data.shape[2:4])
+    return data.reshape(nx, ny, nz, -1, order='F')
 
 
-def uncat2echos(data, Ne):
+def makeadmask(data, minimum=True, getsum=False):
     """
-    Combines z- and echo-axis in `data`
+    Makes map of `data` specifying longest echo a voxel can be sampled with
 
     Parameters
     ----------
-    data : array_like
-        Array of shape (nx, ny, nz, Ne, nt)
-    Ne : int
-        Number of echoes; should be equal to `data.shape[3]`
+    data : (X x Y x Z x E x T) array_like
+        Where `X`, `Y`, `Z` are spatial dims, `E` is echos, and `T` is time
+    minimum : bool, optional
+        Use `make_min_mask` instead of generating a map with echo-specific.
+        Default: True
+    getsum : bool, optional
+        Return `masksum` in addition to mask. Default: False
 
     Returns
     -------
-    ndarray
-        Array of shape (nx, ny, nz*Ne, nt)
+    mask : (X x Y x Z) np.ndarray
+        Boolean array of voxels that have sufficient signal in at least one
+        echo
+    masksum : (X x Y x Z) np.ndarray
+        Valued array indicating the number of echos with sufficient signal in a
+        given voxel. Only returned if `getsum = True`
     """
 
-    nx, ny = data.shape[0:2]
-    nz = data.shape[2] * Ne
-    if len(data.shape) > 4:
-        nt = data.shape[4]
-    else:
-        nt = 1
-    return np.reshape(data, (nx, ny, nz, nt), order='F')
-
-
-def make_mask(catdata):
+    if minimum:
+        return make_min_mask(data)
+
+    x, y, z, n_echos, _ = data.shape
+    emeans = data.mean(axis=-1)
+    first_echo = emeans[:, :, :, 0]
+    # make a map of longest echo with which a voxel can be sampled, with min
+    # value of map as X value of voxel that has median value in the 1st echo
+    # N.B. larger factor (%ile??) leads to bias to lower TEs
+    perc33 = np.percentile(first_echo[first_echo.nonzero()], 33,
+                           interpolation='higher')  # why take 33rd %ile?
+    medv = (first_echo == perc33)
+    lthrs = np.vstack([emeans[:, :, :, echo][medv] / 3 for echo in
+                       range(n_echos)])  # why divide by three?
+    lthrs = lthrs[:, lthrs.sum(0).argmax()]
+    mthr = np.ones(data.shape[:-1])
+    for echo in range(n_echos):
+        mthr[:, :, :, echo] *= lthrs[echo]
+
+    masksum = (np.abs(emeans) > mthr).astype('int').sum(axis=-1)
+    mask = (masksum != 0)
+
+    if getsum:
+        return mask, masksum
+
+    return mask
+
+
+def make_min_mask(data):
     """
-    Generates a 3D mask of `catdata`
+    Generates a 3D mask of `data`
 
     Only voxels that are consistently (i.e., across time AND echoes) non-zero
-    in `catdata` are True in output
+    in `data` are True in output
 
     Parameters
     ----------
-    catdata : (X x Y x Z x E x T) array_like
+    data : (X x Y x Z x E x T) array_like
         Multi-echo data array, where X, Y, Z are spatial dimensions, E
         corresponds to individual echo data, and T is time
 
@@ -114,8 +147,8 @@ def make_mask(catdata):
         Boolean array
     """
 
-    catdata = np.asarray(catdata)
-    return catdata.prod(axis=-1).prod(axis=-1).astype('bool')
+    data = np.asarray(data)
+    return data.prod(axis=-1).prod(axis=-1).astype('bool')
 
 
 def make_opt_com(medata):
@@ -130,34 +163,45 @@ def make_opt_com(medata):
     pass
 
 
-def fmask(data, mask):
+def fmask(data, mask=None):
     """
     Masks `data` with non-zero entries of `mask`
 
     Parameters
     ----------
-    data : array_like
-        Array of shape (nx, ny, nz[, Ne[, nt]])
-    mask : array_like
-        Boolean array of shape (nx, ny, nz)
+    data : (X x Y x Z [x E [x T]) array_like or niimg-like object
+        Data array or data file to be masked
+    mask : (X x Y x Z) array_like or niimg-like object
+        Boolean array or mask file
 
     Returns
     -------
-    ndarray
-        Masked array of shape (nx*ny*nz[, Ne[, nt]])
+    fdata : (V [x E] x T) np.ndarray
+        Masked `data`, where `V` is voxels/vertices, `E` is echoes, and `T` is
+        time
     """
 
-    s = data.shape
-
-    N = s[0] * s[1] * s[2]
-    news = []
-    news.append(N)
+    if mask is not None and not type(data) == type(mask):
+        raise TypeError('Provided `data` and `mask` must be of same type.')
 
-    if len(s) > 3:
-        news.extend(s[3:])
-
-    tmp1 = np.reshape(data, news)
-    fdata = tmp1.compress((mask > 0).ravel(), axis=0)
+    if isinstance(data, str):
+        root, ext, addext = split_add_ext(data)
+        if ext == '.gii':
+            # mask need not apply for gii files
+            fdata = np.column_stack([f.data for f in nib.load(data).darrays])
+        else:
+            # use nilearn for other files
+            data = check_niimg(data)
+            if mask is not None:
+                # TODO: check that this uses same order to flatten
+                fdata = nimask.apply_mask(data, mask).T
+            else:
+                fdata = data.get_data().reshape((-1,) + data.shape[3:])
+    elif isinstance(data, np.ndarray):
+        # flatten data over first three dimensions and apply mask
+        fdata = data.reshape((-1,) + data.shape[3:])
+        if mask is not None:
+            fdata = fdata[mask.flatten() > 0]
 
     return fdata.squeeze()
 
@@ -168,15 +212,15 @@ def unmask(data, mask):
 
     Parameters
     ----------
-    data : array_like
-        Masked array of shape (nx*ny*nz[, Ne[, nt]])
-    mask : array_like
-        Boolean array of shape (nx, ny, nz)
+    data : (V x E x T) array_like
+        Masked array, where V is voxels flattened across spatial dimensions
+    mask : (X x Y x Z) array_like
+        Boolean array that was used to mask `data`
 
     Returns
     -------
-    ndarray
-        Array of shape (nx, ny, nz[, Ne[, nt]])
+    fdata : (X x Y x Z x E x T) np.ndarray
+        Unmasked `data` array with spatial dimensions intact
     """
 
     M = (mask != 0).ravel()

From cdc8cf08e0da5b5f898155260226cc4ea52fb40a Mon Sep 17 00:00:00 2001
From: Ross Markello <rossmarkello@gmail.com>
Date: Fri, 27 Apr 2018 11:35:21 -0400
Subject: [PATCH 02/18] [FIX] Screwed up nibabel import

---
 tedana/utils/utils.py | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/tedana/utils/utils.py b/tedana/utils/utils.py
index f697fb730..d853b790b 100644
--- a/tedana/utils/utils.py
+++ b/tedana/utils/utils.py
@@ -1,7 +1,7 @@
 """Utilities for meica package"""
 import numpy as np
 import nibabel as nib
-from nibabel.filename_parser import split_add_ext
+from nibabel.filename_parser import splitext_addext
 from nilearn._utils import check_niimg
 import nilearn.masking as nimask
 from scipy.optimize import leastsq
@@ -151,18 +151,6 @@ def make_min_mask(data):
     return data.prod(axis=-1).prod(axis=-1).astype('bool')
 
 
-def make_opt_com(medata):
-    """
-    Makes optimal combination from input multi-echo data
-
-    Parameters
-    ----------
-    medata : tedana.interfaces.data.MultiEchoData
-    """
-
-    pass
-
-
 def fmask(data, mask=None):
     """
     Masks `data` with non-zero entries of `mask`
@@ -185,7 +173,7 @@ def fmask(data, mask=None):
         raise TypeError('Provided `data` and `mask` must be of same type.')
 
     if isinstance(data, str):
-        root, ext, addext = split_add_ext(data)
+        root, ext, addext = splitext_addext(data)
         if ext == '.gii':
             # mask need not apply for gii files
             fdata = np.column_stack([f.data for f in nib.load(data).darrays])

From a94692b9d8afc2e593bc2a594b94abc7e6e5b23a Mon Sep 17 00:00:00 2001
From: Ross Markello <rossmarkello@gmail.com>
Date: Fri, 27 Apr 2018 16:20:13 -0400
Subject: [PATCH 03/18] [RF] Breaks everything; working to add gifti support

In the process of adding gifti support, but breaking EVERYTHING. Ensuring that
all stages of `tedana` do not require spatial information about the input data.
Making minor aesthetic and stylistic updates as I go through the code.
---
 .gitignore                  |   2 +
 tedana/cli/run.py           |   2 +
 tedana/interfaces/t2smap.py | 160 ++++++++++++++------------
 tedana/interfaces/tedana.py | 150 ++++++++++++++-----------
 tedana/tests/test_utils.py  |  60 ++++++++++
 tedana/utils/__init__.py    |   4 +-
 tedana/utils/utils.py       | 218 ++++++++++++++++++++++--------------
 7 files changed, 372 insertions(+), 224 deletions(-)
 create mode 100644 tedana/tests/test_utils.py

diff --git a/.gitignore b/.gitignore
index 7bbc71c09..02a7b22b3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
+data/
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
diff --git a/tedana/cli/run.py b/tedana/cli/run.py
index 4448ea811..65d9f6919 100644
--- a/tedana/cli/run.py
+++ b/tedana/cli/run.py
@@ -5,10 +5,12 @@
 def get_parser():
     """
     Parses command line inputs for tedana
+
     Returns
     -------
     parser.parse_args() : argparse dict
     """
+
     parser = argparse.ArgumentParser()
     parser.add_argument('-d',
                         dest='data',
diff --git a/tedana/interfaces/t2smap.py b/tedana/interfaces/t2smap.py
index 68ae8ec2c..5a6f94646 100644
--- a/tedana/interfaces/t2smap.py
+++ b/tedana/interfaces/t2smap.py
@@ -7,102 +7,113 @@
 lgr = logging.getLogger(__name__)
 
 
-def t2sadmap(catd, mask, tes, masksum, start_echo):
+def t2sadmap(data, tes, mask, masksum, start_echo):
     """
-    t2sadmap(catd,mask,tes,masksum)
-
-    Input:
-
-    catd  has shape (nx,ny,nz,Ne,nt)
-    mask  has shape (nx,ny,nz)
-    tes   is a 1d numpy array
-    masksum
+    Parameters
+    ----------
+    data : (S x E x T) array_like
+        Multi-echo data array, where `S` is samples, `E` is echos, and `T` is
+        time
+    tes : (E, ) list
+        Echo times
+    mask : (S, ) array_like
+        Boolean array indicating samples that are consistently (i.e., across
+        time AND echoes) non-zero
+    masksum : (S, ) array_like
+        Valued array indicating number of echos that have sufficient signal in
+        given sample
+    start_echo : int
+        First echo to consider
+
+    Returns
+    -------
+    t2sa : (S x E x T) np.ndarray
+        Limited T2* map
+    s0va : (S x E x T) np.ndarray
+        Limited S0 map
+    t2ss : (S x E x T) np.ndarray
+        ???
+    s0vs : (S x E x T) np.ndarray
+        ???
+    t2saf : (S x E x T) np.ndarray
+        Full T2* map
+    s0vaf : (S x E x T) np.ndarray
+        Full S0 map
     """
-    nx, ny, nz, Ne, nt = catd.shape
-    echodata = fmask(catd, mask)
-    Nm = echodata.shape[0]
 
-    t2ss = np.zeros([nx, ny, nz, Ne - 1])
-    s0vs = t2ss.copy()
+    n_samp, n_echos, n_vols = data.shape
+    t2ss, s0vs = np.zeros([n_samp, n_echos - 1]), np.zeros([n_samp, n_echos - 1])
 
-    for ne in range(start_echo, Ne + 1):
-
-        # Do Log Linear fit
-        B = np.reshape(np.abs(echodata[:, :ne]) + 1, (Nm, ne * nt)).transpose()
+    for echo in range(start_echo, n_echos + 1):
+        # perform log linear fit of echo times against MR signal
+        B = np.reshape(np.abs(data[:, :echo, :]) + 1,
+                       (n_samp, echo * n_vols)).T
         B = np.log(B)
-        neg_tes = [-1 * te for te in tes[:ne]]
-        x = np.array([np.ones(ne), neg_tes])
-        X = np.tile(x, (1, nt))
-        X = np.sort(X)[:, ::-1].transpose()
+        neg_tes = [-1 * te for te in tes[:echo]]
+        x = np.array([np.ones(echo), neg_tes])
+        X = np.tile(x, (1, n_vols))
+        X = np.sort(X)[:, ::-1].T
 
         beta, res, rank, sing = np.linalg.lstsq(X, B)
-        t2s = 1 / beta[1, :].transpose()
-        s0 = np.exp(beta[0, :]).transpose()
-
-        t2s[np.isinf(t2s)] = 500.
-        s0[np.isnan(s0)] = 0.
-
-        t2ss[:, :, :, ne - 2] = np.squeeze(unmask(t2s, mask))
-        s0vs[:, :, :, ne - 2] = np.squeeze(unmask(s0, mask))
-
-    # Limited T2* and S0 maps
-    fl = np.zeros([nx, ny, nz, len(tes) - 2 + 1])
-    for ne in range(Ne - 1):
-        fl_ = np.squeeze(fl[:, :, :, ne])
-        fl_[masksum == ne + 2] = True
-        fl[:, :, :, ne] = fl_
-    fl = np.array(fl, dtype=bool)
-    t2sa = np.squeeze(unmask(t2ss[fl], masksum > 1))
-    s0va = np.squeeze(unmask(s0vs[fl], masksum > 1))
-
-    # Full T2* maps with S0 estimation errors
-    t2saf = t2sa.copy()
-    s0vaf = s0va.copy()
+        t2s = 1 / beta[1, :].T
+        s0 = np.exp(beta[0, :]).T
+
+        t2s[np.isinf(t2s)] = 500.  # why 500?
+        s0[np.isnan(s0)] = 0.      # why 0?
+
+        t2ss[..., echo - 2] = np.squeeze(t2s)
+        s0vs[..., echo - 2] = np.squeeze(s0)
+
+    # create limited T2* and S0 maps
+    fl = np.zeros([n_samp, len(tes) - 1], dtype=bool)
+    for echo in range(n_echos - 1):
+        fl_ = np.squeeze(fl[..., echo])
+        fl_[masksum == echo + 2] = True
+        fl[..., echo] = fl_
+    t2sa, s0va = masksum.copy(), masksum.copy()
+    t2sa[masksum > 1], s0va[masksum > 1] = t2ss[fl], s0vs[fl]
+
+    # create full T2* maps with S0 estimation errors
+    t2saf, s0vaf = t2sa.copy(), s0va.copy()
     t2saf[masksum == 1] = t2ss[masksum == 1, 0]
     s0vaf[masksum == 1] = s0vs[masksum == 1, 0]
 
     return t2sa, s0va, t2ss, s0vs, t2saf, s0vaf
 
 
-def optcom(data, t2, tes, mask, combmode, useG=False):
+def optcom(data, t2, tes, mask, combmode):
     """
-    out = optcom(data,t2s)
-
-
-    Input:
-
-    data.shape = (nx,ny,nz,Ne,Nt)
-    t2s.shape  = (nx,ny,nz)
-    tes.shape  = len(Ne)
-
-    Output:
-
-    out.shape = (nx,ny,nz,Nt)
+    Parameters
+    ----------
+    data : (S x E x T) array_like
+    t2 : (S, ) array_like
+    tes : (E, ) list
+    combmode : str
+        Must be in ['ste', 't2s']. Determines method for optimal combination
+
+    Returns
+    -------
+    comb_data : (S x T) np.ndarray
+        Optimally combined data
     """
-    nx, ny, nz, Ne, Nt = data.shape
 
-    if useG:
-        fdat = fmask(data, mask)
-        ft2s = fmask(t2, mask)
+    n_samp, n_echos, n_vols = data.shape
 
-    else:
-        fdat = fmask(data, mask)
-        ft2s = fmask(t2, mask)
+    tes = np.array(tes)[np.newaxis]  # (1 x E) array_like
+    t2s = t2[:, np.newaxis]  # (S x 1) array_like
 
-    tes = np.array(tes)
-    tes = tes[np.newaxis, :]
-    ft2s = ft2s[:, np.newaxis]
+    comb_data = np.zeros((data.shape[0], data.shape[-1]))
+    mdata = data[mask]
 
     if combmode == 'ste':
-        alpha = fdat.mean(-1) * tes
+        alpha = mdata.mean(axis=-1) * tes
     else:
-        alpha = tes * np.exp(-tes / ft2s)
+        alpha = tes * np.exp(-tes / t2s[mask])
 
-    alpha = np.tile(alpha[:, :, np.newaxis], (1, 1, Nt))
+    alpha = np.tile(alpha[:, :, np.newaxis], (1, 1, n_vols))
+    comb_data[mask] = np.average(mdata, axis=1, weights=alpha)
 
-    fout = np.average(fdat, axis=1, weights=alpha)
-    out = unmask(fout, mask)
-    return out
+    return comb_data
 
 
 def main(options):
@@ -115,6 +126,7 @@ def main(options):
 
     tes = [float(te) for te in options.tes]
     ne = len(tes)
+
     catim = nib.load(options.data[0])
     head = catim.get_header()
     head.extensions = []
@@ -127,7 +139,7 @@ def main(options):
     mask, masksum = makeadmask(catd, minimum=False, getsum=True)
 
     lgr.info('++ Computing Adaptive T2* map')
-    t2s, s0, t2ss, s0vs, t2saf, s0vaf = t2sadmap(catd, mask, tes, masksum, 2)
+    t2s, s0, t2ss, s0vs, t2saf, s0vaf = t2sadmap(catd, tes, mask, masksum, 2)
     niwrite(masksum, aff, 'masksum%s.nii' % suf)
     niwrite(t2ss, aff, 't2ss%s.nii' % suf)
     niwrite(s0vs, aff, 's0vs%s.nii' % suf)
diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py
index b69b99dc3..175ae991a 100644
--- a/tedana/interfaces/tedana.py
+++ b/tedana/interfaces/tedana.py
@@ -6,9 +6,10 @@
 import numpy as np
 import nibabel as nib
 from sklearn import svm
+from scipy.special import lpmv
 import scipy.stats as stats
 from tedana.interfaces import (optcom, t2sadmap)
-from tedana.utils import (cat2echos, uncat2echos, make_mask,
+from tedana.utils import (cat2echos, uncat2echos, make_min_mask,
                           makeadmask, fmask, unmask,
                           fitgaussian, niwrite, dice, andb)
 
@@ -313,8 +314,8 @@ def eimask(dd, ees=None):
         hthr = 5 * stats.scoreatpercentile(dd[:, ee, :].flatten(),
                                            98, interpolation_method='lower')
         lgr.info(lthr, hthr)
-        imask[dd[:, ee, :].mean(1) > lthr, ee] = 1
-        imask[dd[:, ee, :].mean(1) > hthr, ee] = 0
+        imask[dd[:, ee, :].mean(axis=1) > lthr, ee] = 1
+        imask[dd[:, ee, :].mean(axis=1) > hthr, ee] = 0
     return imask
 
 
@@ -1022,31 +1023,29 @@ def selcomps(seldict, mmix, head, manacc, debug=False, olevel=2, oversion=99,
 
 
 def tedpca(combmode, mask, stabilize, head, ste=0, mlepca=True):
-    nx, ny, nz, n_echos, nt = catd.shape
+    n_samp, n_echos, n_vols = catd.shape
     ste = np.array([int(ee) for ee in str(ste).split(',')])
     if len(ste) == 1 and ste[0] == -1:
         lgr.info('-Computing PCA of optimally combined multi-echo data')
-        OCmask = make_mask(OCcatd[:, :, :, np.newaxis, :])
-        d = fmask(OCcatd, OCmask)
+        d = OCcatd[make_min_mask(OCcatd[:, np.newaxis, :])]
         eim = eimask(d[:, np.newaxis, :])
         eim = eim[:, 0] == 1
         d = d[eim, :]
     elif len(ste) == 1 and ste[0] == 0:
         lgr.info('-Computing PCA of spatially concatenated multi-echo data')
         ste = np.arange(n_echos)
-        d = np.float64(fmask(catd, mask))
+        d = catd[mask].astype('float64')
         eim = eimask(d) == 1
         d = d[eim]
     else:
         lgr.info('-Computing PCA of TE #%s' % ','.join([str(ee) for ee in ste]))
-        d = np.float64(np.concatenate([fmask(catd[:, :, :, ee, :],
-                                             mask)[:, np.newaxis, :] for ee in ste-1], axis=1))
-        eim = eimask(d) == 1
-        eim = np.squeeze(eim)
+        d = np.concatenate([catd[mask, ee, :][:, np.newaxis] for ee in ste - 1],
+                           axis=1).astype('float64')
+        eim = np.squeeze(eimask(d) == 1)
         d = np.squeeze(d[eim])
 
-    dz = ((d.T - d.T.mean(0)) / d.T.std(0)).T  # Variance normalize timeseries
-    dz = (dz - dz.mean()) / dz.std()  # Variance normalize everything
+    dz = ((d.T - d.T.mean(axis=0)) / d.T.std(axis=0)).T  # var normalize ts
+    dz = (dz - dz.mean()) / dz.std()  # var normalize everything
 
     if not os.path.exists('pcastate.pkl'):
 
@@ -1057,18 +1056,20 @@ def tedpca(combmode, mask, stabilize, head, ste=0, mlepca=True):
             ppca.fit(dz)
             v = ppca.components_
             s = ppca.explained_variance_
-            u = np.dot(np.dot(dz, v.T), np.diag(1./s))
+            u = np.dot(np.dot(dz, v.T), np.diag(1. / s))
         else:
             u, s, v = np.linalg.svd(dz, full_matrices=0)
 
         sp = s/s.sum()
         eigelb = sp[getelbow_mod(sp)]
 
-        spdif = np.abs(sp[1:]-sp[:-1])
+        spdif = np.abs(sp[1:] - sp[:-1])
         spdifh = spdif[(spdif.shape[0]//2):]
         spdmin = spdif.min()
         spdthr = np.mean([spdifh.max(), spdmin])
-        spmin = sp[(spdif.shape[0]//2)+(np.arange(spdifh.shape[0])[spdifh >= spdthr][0]) + 1]
+        spmin = sp[(spdif.shape[0]//2) +
+                   (np.arange(spdifh.shape[0])[spdifh >= spdthr][0]) +
+                   1]
         spcum = []
         spcumv = 0
         for sss in sp:
@@ -1079,8 +1080,11 @@ def tedpca(combmode, mask, stabilize, head, ste=0, mlepca=True):
         # Compute K and Rho for PCA comps
         eimum = np.atleast_2d(eim)
         eimum = np.transpose(eimum, np.argsort(np.atleast_2d(eim).shape)[::-1])
-        eimum = np.array(np.squeeze(unmask(eimum.prod(1), mask)),
-                         dtype=np.bool)
+        eimum = eimum.prod(axis=1)
+        o = np.zeros((mask.shape[0], *eimum.shape[1:]))
+        o[mask] = eimum
+        eimum = np.squeeze(o).astype(bool)
+
         vTmix = v.T
         vTmixN = ((vTmix.T - vTmix.T.mean(0)) / vTmix.T.std(0)).T
         _, ctb, betasv, v_T = fitmodels_direct(catd, v.T, eimum, t2s, t2sG,
@@ -1178,51 +1182,52 @@ def gscontrol_raw(OCcatd, head, n_echos, dtrank=4):
     modify catd (global variable) to removal global signal out of individual
     echo time series datasets. The spatial global signal is estimated
     from the optimally combined data after detrending with a Legendre
-    polynomial basis of order=0 and degree=dtrank.
+    polynomial basis of `order = 0` and `degree = dtrank`.
     """
 
     lgr.info('++ Applying amplitude-based T1 equilibration correction')
 
     # Legendre polynomial basis for denoising
-    from scipy.special import lpmv
-    Lmix = np.array([lpmv(0, vv, np.linspace(-1, 1, OCcatd.shape[-1])) for vv in range(dtrank)]).T
+    n_vols = OCcatd.shape[-1]
+    Lmix = np.array([lpmv(0, vv, np.linspace(-1, 1, n_vols))
+                     for vv in range(dtrank)]).T
 
-    # Compute mean, std, mask local to this function
+    # compute mean, std, mask local to this function
     # inefficient, but makes this function a bit more modular
-    Gmu = OCcatd.mean(-1)
+    Gmu = OCcatd.mean(axis=-1)  # temporal mean
     Gmask = Gmu != 0
 
     # Find spatial global signal
     dat = OCcatd[Gmask] - Gmu[Gmask][:, np.newaxis]
-    sol = np.linalg.lstsq(Lmix, dat.T)  # Legendre basis for detrending
-    detr = dat - np.dot(sol[0].T, Lmix.T)[0]
-    sphis = (detr).min(1)
+    sol = np.linalg.lstsq(Lmix, dat.T)[0]  # Legendre basis for detrending
+    detr = dat - np.dot(sol.T, Lmix.T)[0]
+    sphis = (detr).min(axis=1)
     sphis -= sphis.mean()
-    niwrite(unmask(sphis, Gmask), aff, 'T1gs.nii', head)
+    # niwrite(unmask(sphis, Gmask), aff, 'T1gs.nii', head)  # FIXME
 
-    # Find time course of the spatial global signal
+    # Find time course ofc the spatial global signal
     # make basis with the Legendre basis
     glsig = np.linalg.lstsq(np.atleast_2d(sphis).T, dat)[0]
-    glsig = (glsig-glsig.mean()) / glsig.std()
+    glsig = (glsig - glsig.mean()) / glsig.std()
     np.savetxt('glsig.1D', glsig)
     glbase = np.hstack([Lmix, glsig.T])
 
     # Project global signal out of optimally combined data
-    sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T)
-    tsoc_nogs = dat - np.dot(np.atleast_2d(sol[0][dtrank]).T,
+    sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T)[0]
+    tsoc_nogs = dat - np.dot(np.atleast_2d(sol[dtrank]).T,
                              np.atleast_2d(glbase.T[dtrank])) + Gmu[Gmask][:, np.newaxis]
 
-    niwrite(OCcatd, aff, 'tsoc_orig.nii', head)
-    OCcatd = unmask(tsoc_nogs, Gmask)
-    niwrite(OCcatd, aff, 'tsoc_nogs.nii', head)
+    # niwrite(OCcatd, aff, 'tsoc_orig.nii', head)  # FIXME
+    OCcatd[Gmask] = tsoc_nogs
+    # niwrite(OCcatd, aff, 'tsoc_nogs.nii', head)  # FIXME
 
     # Project glbase out of each echo
-    for ii in range(n_echos):
-        dat = catd[:, :, :, ii, :][Gmask]
-        sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T)
-        e_nogs = dat - np.dot(np.atleast_2d(sol[0][dtrank]).T,
+    for echo in range(n_echos):
+        dat = catd[Gmask, echo, :]
+        sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T)[0]
+        e_nogs = dat - np.dot(np.atleast_2d(sol[dtrank]).T,
                               np.atleast_2d(glbase.T[dtrank]))
-        catd[:, :, :, ii, :] = unmask(e_nogs, Gmask)
+        catd[Gmask, echo, :] = e_nogs
 
     return catd, OCcatd
 
@@ -1393,7 +1398,6 @@ def writeresults_echoes(acc, rej, midk, head, comptable, mmix):
 
 def main(options):
     """
-
     Args (and defaults):
     data, tes, mixm=None, ctab=None, manacc=None, strict=False,
              no_gscontrol=False, kdaw=10., rdaw=1., conv=2.5e-5, ste=-1,
@@ -1407,48 +1411,61 @@ def main(options):
     tes = [float(te) for te in options.tes]
     n_echos = len(tes)
 
-    # get some info on the input data
-    # TODO: only works on nifti
+    # TODO: attempt to derive input data format as soon as possible
+    # we'll need to carry this through to writing out all the resultant output
+    # files for the rest of the script; options should include .nii and .gii
+    #
+    # output_type = get_input_type(options.data)
+
+    # FIXME: only works on nifti
     catim = nib.load(options.data[0])
     head = catim.header
-    head.extensions = []  # clear extension info in header
-    head.set_sform(head.get_sform(), code=1)  # reset sform code
-    aff = catim.get_affine()  # TODO: gifti has no affine
+    head.extensions = []
+    head.set_sform(head.get_sform(), code=1)
+    aff = catim.affine
+
+    # coerce data to samples x echos x time array
     catd = cat2echos(options.data, n_echos=n_echos)
-    nx, ny, nz, n_echos, nt = catd.shape
+    n_samp, n_echos, n_vols = catd.shape
 
-    # parse options, prepare output directory
+    # FIXME: only works on nifti
     if options.fout:
         options.fout = aff
     else:
         options.fout = None
 
     global kdaw, rdaw
+    kdaw = float(options.kdaw)
+    rdaw = float(options.rdaw)
+
     if not options.stabilize:
         stabilize = False
     else:
         stabilize = True
-    kdaw = float(options.kdaw)
-    rdaw = float(options.rdaw)
 
+    # prepare output directory, copy over pre-generated outputs
+    dirname = 'TED'
     if options.label is not None:
-        dirname = '.'.join(['TED', options.label])
-    else:
-        dirname = 'TED'
-    os.mkdir(dirname)
+        dirname = '.'.join([dirname, options.label])
+    os.mkdir(dirname)  # should we check to see if this already exists?
     if options.mixm is not None:
         try:
-            shutil.copyfile(options.mixm, os.path.join(dirname, 'meica_mix.1D'))
-            shutil.copyfile(options.mixm, os.path.join(dirname, os.path.basename(options.mixm)))
+            shutil.copyfile(options.mixm,
+                            os.path.join(dirname, 'meica_mix.1D'))
+            shutil.copyfile(options.mixm,
+                            os.path.join(dirname,
+                                         os.path.basename(options.mixm)))
         except shutil.Error:
             pass
     if options.ctab is not None:
         try:
-            shutil.copyfile(options.mixm, os.path.join(dirname, 'comp_table.txt'))
-            shutil.copyfile(options.mixm, os.path.join(dirname, os.path.basename(options.mixm)))
+            shutil.copyfile(options.ctab,
+                            os.path.join(dirname, 'comp_table.txt'))
+            shutil.copyfile(options.ctab,
+                            os.path.join(dirname,
+                                         os.path.basename(options.ctab)))
         except shutil.Error:
             pass
-
     os.chdir(dirname)
 
     lgr.info('++ Computing Mask')
@@ -1457,12 +1474,16 @@ def main(options):
 
     lgr.info('++ Computing T2* map')
     global t2s, s0, t2ss, s0s, t2sG, s0G
-    t2s, s0, t2ss, s0s, t2sG, s0G = t2sadmap(catd, mask, tes, masksum, 1)
+    # TODO: can we maybe not do this? returning six things is a lot...
+    # also, WHAT ARE THEY?!?!?
+    t2s, s0, t2ss, s0s, t2sG, s0G = t2sadmap(catd, tes, mask, masksum, 1)
 
-    # Condition values
+    # set a hard cap for the T2* map
+    # anything that is 10x higher than the 99.5 %ile will be reset to
     cap_t2s = stats.scoreatpercentile(t2s.flatten(), 99.5,
                                       interpolation_method='lower')
-    t2s[t2s > cap_t2s*10] = cap_t2s
+    t2s[t2s > cap_t2s * 10] = cap_t2s
+    # FIXME: need to write the appropriate output file type!
     niwrite(s0, aff, 's0v.nii', head)
     niwrite(t2s, aff, 't2sv.nii', head)
     niwrite(t2ss, aff, 't2ss.nii', head)
@@ -1470,11 +1491,10 @@ def main(options):
     niwrite(s0G, aff, 's0vG.nii', head)
     niwrite(t2sG, aff, 't2svG.nii', head)
 
-    # Optimally combine data
+    # optimally combine data
     combmode = options.combmode
     global OCcatd
-    OCcatd = optcom(catd, t2sG, tes, mask,
-                    combmode, useG=True)
+    OCcatd = optcom(catd, t2sG, tes, mask, combmode)
     if not options.no_gscontrol:
         catd, OCcatd = gscontrol_raw(OCcatd, head, len(tes))
 
@@ -1522,7 +1542,7 @@ def main(options):
         lgr.info('** WARNING! No BOLD components detected!!! \n'
                  '** Please check data and results!')
 
-    writeresults(OCcatd, comptable, mmix, nt, acc, rej, midk, empty, head)
+    writeresults(OCcatd, comptable, mmix, n_vols, acc, rej, midk, empty, head)
     gscontrol_mmix(mmix, acc, rej, midk, empty, head)
     if options.dne:
         writeresults_echoes(acc, rej, midk, head, comptable, mmix)
diff --git a/tedana/tests/test_utils.py b/tedana/tests/test_utils.py
new file mode 100644
index 000000000..899f71018
--- /dev/null
+++ b/tedana/tests/test_utils.py
@@ -0,0 +1,60 @@
+"""
+Tests for tedana.utils
+"""
+
+import os.path
+from tedana import utils
+import nibabel as nb
+import numpy as np
+
+
+def test_load_image():
+    pass
+
+
+def test_cat2echos():
+    pass
+
+
+def test_makeadmask():
+    pass
+
+
+def test_make_min_mask():
+    pass
+
+
+def test_uncat2echos():
+    pass
+
+
+def test_fmask():
+    pass
+
+
+def test_unmask():
+    pass
+
+
+def test_moments():
+    pass
+
+
+def test_gaussian():
+    pass
+
+
+def test_fitgaussian():
+    pass
+
+
+def test_niwrite():
+    pass
+
+
+def test_dice():
+    pass
+
+
+def test_andb():
+    pass
diff --git a/tedana/utils/__init__.py b/tedana/utils/__init__.py
index 35925964d..4059ccb24 100644
--- a/tedana/utils/__init__.py
+++ b/tedana/utils/__init__.py
@@ -2,13 +2,13 @@
 # ex: set sts=4 ts=4 sw=4 et:
 
 from .utils import (
-    cat2echos, uncat2echos, make_mask,
+    cat2echos, uncat2echos, make_min_mask,
     makeadmask, fmask, unmask,
     fitgaussian, niwrite, dice, andb,
 )
 
 
 __all__ = [
-    'cat2echos', 'uncat2echos', 'make_mask',
+    'cat2echos', 'uncat2echos', 'make_min_mask',
     'makeadmask', 'fmask', 'unmask',
     'fitgaussian', 'niwrite', 'dice', 'andb']
diff --git a/tedana/utils/utils.py b/tedana/utils/utils.py
index d853b790b..e8b25af59 100644
--- a/tedana/utils/utils.py
+++ b/tedana/utils/utils.py
@@ -9,71 +9,86 @@
 from ..due import due, BibTeX
 
 
-# TODO: Currently only accepts niftis -- do we need it to accept giftis?
-def cat2echos(data, n_echos=None):
+def load_image(data):
     """
-    Coerces input `data` files to required array output
+    Takes input `data` and returns a sample x time array
 
     Parameters
     ----------
-    data : (X x Y x M x T) array_like or list-of-niimg-like
-        Input multi-echo data array or independent echo files, where M is Z *
-        the number of echos
-    n_echos : int
-        Number of echos
+    data : (X x Y x Z [x T]) array_like or niimg-like object
+        Data array or data file to be loaded / reshaped
 
     Returns
     -------
-    fdata : (X x Y x Z x E x T) np.ndarray
-        Where `X`, `Y`, `Z` are spatial dims, `E` is echos, and `T` is time
+    fdata : (S x T) np.ndarray
+        Reshaped `data`, where `S` is samples and `T` is time
     """
 
-    if isinstance(data, list):
-        # the individual echo files were provided
-        if len(data) > 2:
-            fdata = np.stack([nib.load(f).get_data() for f in data], axis=3)
-            # ensure we have a time dimension
-            if fdata.ndim < 5:
-                fdata = fdata[..., np.newaxis]
+    if isinstance(data, str):
+        root, ext, addext = splitext_addext(data)
+        if ext == '.gii':
+            fdata = np.column_stack([f.data for f in nib.load(data).darrays])
             return fdata
-        # a z-concatenated file was provided (hopefully)
-        elif len(data) == 1:
-            if n_echos is None:
-                raise ValueError('Number of echos `n_echos` must be specified '
-                                 'if z-concatenated data file provided.')
-            data = nib.load(data[0]).get_data()
         else:
-            raise ValueError('Cannot run `tedana` with only two echos: '
-                             '{}'.format(data))
+            data = check_niimg(data).get_data()
 
-    # either an array or a z-concatenated file was provided
-    nx, ny, nz = data.shape[:2], data.shape[2] // n_echos
-    fdata = data.reshape(nx, ny, nz, n_echos, -1, order='F')
+    fdata = data.reshape((-1,) + data.shape[3:])
 
-    return fdata
+    return fdata.squeeze()
 
 
-def uncat2echos(data):
+def cat2echos(data, n_echos=None):
     """
-    Combines Z- and echo-axis in `data`
+    Coerces input `data` files to required 3D array output
 
     Parameters
     ----------
-    data : (X x Y x Z x E x T) array_like
-        Multi-echo data array
+    data : (X x Y x M x T) array_like or list-of-img-like
+        Input multi-echo data array, where `X` and `Y` are spatial dimensions,
+        `M` is the Z-spatial dimensions with all the input echos concatenated,
+        and `T` is time. A list of image-like objects (e.g., .nii or .gii) are
+        accepted, as well
+    n_echos : int, optional
+        Number of echos in provided data array. Only necessary if `data` is
+        array_like. Default: None
 
     Returns
     -------
-    fdata : (X x Y x M x T) np.ndarray
-        Z-concatenated multi-echo data array, where M is Z * number of echos
+    fdata : (S x E x T) np.ndarray
+        Output data where `S` is samples, `E` is echos, and `T` is time
     """
 
-    if data.ndim < 4:
-        raise ValueError('Input data must have at least four dimensions; '
-                         'provided data has only {0}'.format(data.ndim))
+    # data files were provided
+    if isinstance(data, list):
+        # individual echo files were provided
+        if len(data) > 2:
+            fdata = np.stack([load_image(f) for f in data], axis=1)
+        # a z-concatenated file was provided
+        elif len(data) == 1:
+            if n_echos is None:
+                raise ValueError('Number of echos `n_echos` must be specified '
+                                 'if z-concatenated data file provided.')
+            fdata = load_image(data[0])
+        # only two echo files were provided, which doesn't fly
+        else:
+            raise ValueError('Cannot run `tedana` with only two echos: '
+                             '{}'.format(data))
+        # ensure data has a time axis
+        if fdata.ndim < 3:
+            fdata = fdata[..., np.newaxis]
+    # data array was provided (is this necessary?)
+    elif isinstance(data, np.ndarray):
+        if data.ndim != 4:
+            raise ValueError('Data must be 4-dimensional, where the '
+                             'dimensions correspond to: (1) first spatial '
+                             'dimensions, (2) second spatial dimension, (3) '
+                             'third spatial dimension x number of echos, and '
+                             '(4) time. Provided data dimensions were: '
+                             '{}'.format(data.shape))
+        nx, ny, nz = data.shape[:2], data.shape[2] // n_echos
+        fdata = load_image(data.reshape(nx, ny, nz, n_echos, -1, order='F'))
 
-    (nx, ny), nz = data.shape[:2], np.prod(data.shape[2:4])
-    return data.reshape(nx, ny, nz, -1, order='F')
+    return fdata
 
 
 def makeadmask(data, minimum=True, getsum=False):
@@ -82,20 +97,21 @@ def makeadmask(data, minimum=True, getsum=False):
 
     Parameters
     ----------
-    data : (X x Y x Z x E x T) array_like
-        Where `X`, `Y`, `Z` are spatial dims, `E` is echos, and `T` is time
+    data : (S x E x T) array_like
+        Multi-echo data array, where `S` is samples, `E` is echos, and `T` is
+        time
     minimum : bool, optional
-        Use `make_min_mask` instead of generating a map with echo-specific.
-        Default: True
+        Use `make_min_mask()` instead of generating a map with echo-specific
+        times. Default: True
     getsum : bool, optional
-        Return `masksum` in addition to mask. Default: False
+        Return `masksum` in addition to `mask`. Default: False
 
     Returns
     -------
-    mask : (X x Y x Z) np.ndarray
+    mask : (S, ) np.ndarray
         Boolean array of voxels that have sufficient signal in at least one
         echo
-    masksum : (X x Y x Z) np.ndarray
+    masksum : (S, ) np.ndarray
         Valued array indicating the number of echos with sufficient signal in a
         given voxel. Only returned if `getsum = True`
     """
@@ -103,23 +119,23 @@ def makeadmask(data, minimum=True, getsum=False):
     if minimum:
         return make_min_mask(data)
 
-    x, y, z, n_echos, _ = data.shape
-    emeans = data.mean(axis=-1)
-    first_echo = emeans[:, :, :, 0]
+    n_samp, n_echos, n_vols = data.shape
+    echo_means = data.mean(axis=-1)  # temporal mean of echos
+    first_echo = echo_means[..., 0]
     # make a map of longest echo with which a voxel can be sampled, with min
     # value of map as X value of voxel that has median value in the 1st echo
     # N.B. larger factor (%ile??) leads to bias to lower TEs
-    perc33 = np.percentile(first_echo[first_echo.nonzero()], 33,
-                           interpolation='higher')  # why take 33rd %ile?
-    medv = (first_echo == perc33)
-    lthrs = np.vstack([emeans[:, :, :, echo][medv] / 3 for echo in
+    perc_33 = np.percentile(first_echo[first_echo.nonzero()], 33,
+                            interpolation='higher')  # why take 33rd %ile?
+    med_val = (first_echo == perc_33)
+    lthrs = np.vstack([echo_means[..., echo][med_val] / 3 for echo in
                        range(n_echos)])  # why divide by three?
     lthrs = lthrs[:, lthrs.sum(0).argmax()]
     mthr = np.ones(data.shape[:-1])
     for echo in range(n_echos):
-        mthr[:, :, :, echo] *= lthrs[echo]
+        mthr[..., echo] *= lthrs[echo]
 
-    masksum = (np.abs(emeans) > mthr).astype('int').sum(axis=-1)
+    masksum = (np.abs(echo_means) > mthr).astype('int').sum(axis=-1)
     mask = (masksum != 0)
 
     if getsum:
@@ -137,18 +153,71 @@ def make_min_mask(data):
 
     Parameters
     ----------
-    data : (X x Y x Z x E x T) array_like
-        Multi-echo data array, where X, Y, Z are spatial dimensions, E
-        corresponds to individual echo data, and T is time
+    data : (S x E x T) array_like
+        Multi-echo data array, where `S` is samples, `E` is echos, and `T` is
+        time
 
     Returns
     -------
-    mask : (X x Y x Z) np.ndarray
+    mask : (S, ) np.ndarray
         Boolean array
     """
 
-    data = np.asarray(data)
-    return data.prod(axis=-1).prod(axis=-1).astype('bool')
+    data = np.asarray(data).astype(bool)
+    return data.prod(axis=-1).prod(axis=-1).astype(bool)
+
+
+def get_input_type(input):
+    pass
+
+
+def niwrite(data, affine, name, head, outtype='.nii.gz'):
+    """
+    Write out nifti file.
+
+    Parameters
+    ----------
+    data : array_like
+    affine : (4 x 4) array_like
+        Affine for output file
+    name : str
+        Name to save output file to
+    head : object
+    outtype : str, optional
+        Output type of file. Default: '.nii.gz'
+    """
+
+    # get rid of NaN
+    data[np.isnan(data)] = 0
+    # set header info
+    header = head.copy()
+    header.set_data_shape(list(data.shape))
+    outni = nib.Nifti1Image(data, affine, header=header)
+    outni.set_data_dtype('float64')
+    outni.to_filename(name)
+
+
+def uncat2echos(data):
+    """
+    Combines Z- and echo-axis in `data`
+
+    Parameters
+    ----------
+    data : (X x Y x Z x E x T) array_like
+        Multi-echo data array
+
+    Returns
+    -------
+    fdata : (X x Y x M x T) np.ndarray
+        Z-concatenated multi-echo data array, where M is Z * number of echos
+    """
+
+    if data.ndim < 4:
+        raise ValueError('Input data must have at least four dimensions; '
+                         'provided data has only {0}'.format(data.ndim))
+
+    (nx, ny), nz = data.shape[:2], np.prod(data.shape[2:4])
+    return data.reshape(nx, ny, nz, -1, order='F')
 
 
 def fmask(data, mask=None):
@@ -164,9 +233,8 @@ def fmask(data, mask=None):
 
     Returns
     -------
-    fdata : (V [x E] x T) np.ndarray
-        Masked `data`, where `V` is voxels/vertices, `E` is echoes, and `T` is
-        time
+    fdata : (S x E x T) np.ndarray
+        Masked `data`, where `S` is samples, `E` is echoes, and `T` is time
     """
 
     if mask is not None and not type(data) == type(mask):
@@ -200,8 +268,8 @@ def unmask(data, mask):
 
     Parameters
     ----------
-    data : (V x E x T) array_like
-        Masked array, where V is voxels flattened across spatial dimensions
+    data : (S x E x T) array_like
+        Masked array, where S is samples flattened across spatial dimensions
     mask : (X x Y x Z) array_like
         Boolean array that was used to mask `data`
 
@@ -323,22 +391,6 @@ def errorfunction(p, data):
     return p
 
 
-def niwrite(data, affine, name, head, header=None):
-    """
-    Write out nifti file.
-    """
-    data[np.isnan(data)] = 0
-    if header is None:
-        this_header = head.copy()
-        this_header.set_data_shape(list(data.shape))
-    else:
-        this_header = header
-
-    outni = nib.Nifti1Image(data, affine, header=this_header)
-    outni.set_data_dtype('float64')
-    outni.to_filename(name)
-
-
 @due.dcite(BibTeX('@article{dice1945measures,'
                   'author={Dice, Lee R},'
                   'title={Measures of the amount of ecologic association between species},'

From 4e735bb97268688a7613ad8745f537b31f145940 Mon Sep 17 00:00:00 2001
From: Ross Markello <rossmarkello@gmail.com>
Date: Fri, 4 May 2018 01:04:47 -0400
Subject: [PATCH 04/18] [RF] Still very broken fixing fitmodels_direct

Everything is still very broken as I'm working to through to fix all the
functions to work with 3D arrays (samples x echos x time). I found the place
where things start to diverge from the previous versions, but it seems to be
due to numerical instabilities? Unclear. Will figure out later!
---
 tedana/interfaces/t2smap.py |  23 ++--
 tedana/interfaces/tedana.py | 225 ++++++++++++++++++------------------
 tedana/utils/utils.py       |  55 +++------
 3 files changed, 141 insertions(+), 162 deletions(-)

diff --git a/tedana/interfaces/t2smap.py b/tedana/interfaces/t2smap.py
index 5a6f94646..f83d193a6 100644
--- a/tedana/interfaces/t2smap.py
+++ b/tedana/interfaces/t2smap.py
@@ -42,17 +42,16 @@ def t2sadmap(data, tes, mask, masksum, start_echo):
     """
 
     n_samp, n_echos, n_vols = data.shape
+    data = data[mask]
     t2ss, s0vs = np.zeros([n_samp, n_echos - 1]), np.zeros([n_samp, n_echos - 1])
 
     for echo in range(start_echo, n_echos + 1):
         # perform log linear fit of echo times against MR signal
-        B = np.reshape(np.abs(data[:, :echo, :]) + 1,
-                       (n_samp, echo * n_vols)).T
-        B = np.log(B)
-        neg_tes = [-1 * te for te in tes[:echo]]
-        x = np.array([np.ones(echo), neg_tes])
-        X = np.tile(x, (1, n_vols))
-        X = np.sort(X)[:, ::-1].T
+        # make DV matrix: samples x (time series * echos)
+        B = np.log((np.abs(data[:, :echo, :]) + 1).reshape(len(data), -1).T)
+        # make IV matrix: intercept/TEs x (time series * echos)
+        x = np.column_stack([np.ones(echo), [-te for te in tes[:echo]]])
+        X = np.repeat(x, n_vols, axis=0)
 
         beta, res, rank, sing = np.linalg.lstsq(X, B)
         t2s = 1 / beta[1, :].T
@@ -61,8 +60,8 @@ def t2sadmap(data, tes, mask, masksum, start_echo):
         t2s[np.isinf(t2s)] = 500.  # why 500?
         s0[np.isnan(s0)] = 0.      # why 0?
 
-        t2ss[..., echo - 2] = np.squeeze(t2s)
-        s0vs[..., echo - 2] = np.squeeze(s0)
+        t2ss[..., echo - 2] = np.squeeze(unmask(t2s, mask))
+        s0vs[..., echo - 2] = np.squeeze(unmask(s0, mask))
 
     # create limited T2* and S0 maps
     fl = np.zeros([n_samp, len(tes) - 1], dtype=bool)
@@ -70,8 +69,8 @@ def t2sadmap(data, tes, mask, masksum, start_echo):
         fl_ = np.squeeze(fl[..., echo])
         fl_[masksum == echo + 2] = True
         fl[..., echo] = fl_
-    t2sa, s0va = masksum.copy(), masksum.copy()
-    t2sa[masksum > 1], s0va[masksum > 1] = t2ss[fl], s0vs[fl]
+    t2sa, s0va = unmask(t2ss[fl], masksum > 1), unmask(s0vs[fl], masksum > 1)
+    # t2sa[masksum > 1], s0va[masksum > 1] = t2ss[fl], s0vs[fl]
 
     # create full T2* maps with S0 estimation errors
     t2saf, s0vaf = t2sa.copy(), s0va.copy()
@@ -89,7 +88,7 @@ def optcom(data, t2, tes, mask, combmode):
     t2 : (S, ) array_like
     tes : (E, ) list
     combmode : str
-        Must be in ['ste', 't2s']. Determines method for optimal combination
+        Must be in ['ste', 't2s']. Determines method for optimal combination.
 
     Returns
     -------
diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py
index 175ae991a..b903e361f 100644
--- a/tedana/interfaces/tedana.py
+++ b/tedana/interfaces/tedana.py
@@ -8,6 +8,7 @@
 from sklearn import svm
 from scipy.special import lpmv
 import scipy.stats as stats
+from scipy.stats import skew
 from tedana.interfaces import (optcom, t2sadmap)
 from tedana.utils import (cat2echos, uncat2echos, make_min_mask,
                           makeadmask, fmask, unmask,
@@ -147,37 +148,38 @@ def rankvec(vals):
 
 def get_coeffs(data, mask, X, add_const=False):
     """
-    get_coeffs(data,X)
+    get_coeffs(data, X)
 
     Parameters
     ----------
-    data : array-like
-        Array of shape (nx, ny, nz, nt)
-    mask : array-like
-        Array of shape (nx, ny, nz)
-    X : array-like
-        Array of shape (nt, nc)
+    data : (S x T) array-like
+        Array where `S` is samples and `T` is time
+    mask : (S,) array-like
+        Boolean mask array
+    X : (T x C) array-like
+        Array where `T` is time and `C` is components
     add_const : bool, optional
-        Default is False.
+        Add intercept column to `X` before fitting. Default: False
 
     Returns
     -------
-    out : array_like
-        Array of shape (nx, ny, nz, nc)
+    out : (S x C) np.ndarray
+        Array of betas for all samples `S`
     """
-    mdata = fmask(data, mask).transpose()
+
+    mdata = data[mask].T
 
     # Coerce X to >=2d
     X = np.atleast_2d(X)
 
     if X.shape[0] == 1:
         X = X.T
-    Xones = np.atleast_2d(np.ones(np.min(mdata.shape))).T
-    if add_const:
-        X = np.hstack([X, Xones])
+    if add_const:  # add intercept
+        Xones = np.ones((np.min(mdata.shape), 1))
+        X = np.column_stack([X, Xones])
 
-    tmpbetas = np.linalg.lstsq(X, mdata)[0].transpose()
-    if add_const:
+    tmpbetas = np.linalg.lstsq(X, mdata)[0].T
+    if add_const:  # drop beta for intercept
         tmpbetas = tmpbetas[:, :-1]
     out = unmask(tmpbetas, mask)
 
@@ -235,10 +237,11 @@ def getelbow_mod(ks, val=False):
         Either the elbow index (if val is True) or the values at the elbow
         index (if val is False)
     """
+
     ks = np.sort(ks)[::-1]
     nc = ks.shape[0]
     coords = np.array([np.arange(nc), ks])
-    p = coords - np.tile(np.reshape(coords[:, 0], (2, 1)), (1, nc))
+    p = coords - coords[:, 0].reshape(2, 1)
     b = p[:, -1]
     b_hat = np.reshape(b / np.sqrt((b ** 2).sum()), (2, 1))
     proj_p_b = p - np.dot(b_hat.T, p) * np.tile(b_hat, (1, nc))
@@ -304,18 +307,21 @@ def getfbounds(n_echos):
 
 
 def eimask(dd, ees=None):
+    """
+    Returns mask for data between [0.001, 5] * 98th percentile of dd
+    """
     if ees is None:
         ees = range(dd.shape[1])
-    imask = np.zeros([dd.shape[0], len(ees)])
+    imask = np.zeros([dd.shape[0], len(ees)], dtype=bool)
     for ee in ees:
         lgr.info(ee)
-        lthr = 0.001 * stats.scoreatpercentile(dd[:, ee, :].flatten(),
-                                               98, interpolation_method='lower')
-        hthr = 5 * stats.scoreatpercentile(dd[:, ee, :].flatten(),
-                                           98, interpolation_method='lower')
+        perc98 = stats.scoreatpercentile(dd[:, ee, :].flatten(), 98,
+                                         interpolation_method='lower')
+        lthr, hthr = 0.001 * perc98, 5 * perc98
         lgr.info(lthr, hthr)
-        imask[dd[:, ee, :].mean(axis=1) > lthr, ee] = 1
-        imask[dd[:, ee, :].mean(axis=1) > hthr, ee] = 0
+        m = dd[:, ee, :].mean(axis=1)
+        imask[np.logical_and(m > lthr, m < hthr), ee] = True
+
     return imask
 
 
@@ -332,16 +338,22 @@ def split_ts(data, comptable, mmix, acc, rej, midk):
 def computefeats2(data, mmix, mask, normalize=True):
     # Write feature versions of components
     data = data[mask]
-    data_vn = (data-data.mean(axis=-1)[:, np.newaxis])/data.std(axis=-1)[:, np.newaxis]
+    # demean data
+    data_vn = (data - data.mean(axis=-1, keepdims=True)) / data.std(axis=-1, keepdims=True)
+    # get betas for demeaned data against `mmix`
     data_R = get_coeffs(unmask(data_vn, mask), mask, mmix)[mask]
-    data_R[data_R < -.999] = -0.999
-    data_R[data_R > .999] = .999
+    # cap betas to range [-0.999, 0.999]
+    data_R[data_R < -0.999] = -0.999
+    data_R[data_R > 0.999] = 0.999
+    # R-to-Z transform?
     data_Z = np.arctanh(data_R)
     if len(data_Z.shape) == 1:
         data_Z = np.atleast_2d(data_Z).T
     if normalize:
-        data_Z = (((data_Z.T - data_Z.mean(0)[:, np.newaxis]) /
-                  data_Z.std(0)[:, np.newaxis]) + (data_Z.mean(0)/data_Z.std(0))[:, np.newaxis]).T
+        # standardize
+        data_Zm = (data_Z - data_Z.mean(axis=0, keepdims=True)) / data_Z.std(axis=0, keepdims=True)
+        # add back mean / stdev
+        data_Z = data_Zm + (data_Z.mean(axis=0, keepdims=True) / data_Z.std(axis=0, keepdims=True))
     return data_Z
 
 
@@ -359,36 +371,31 @@ def ctabsel(ctabfile):
 def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, head,
                      fout=None, reindex=False, mmixN=None, full_sel=True):
     """
-    Usage:
-
-    fitmodels_direct(fout)
-
     Input:
     fout is flag for output of per-component TE-dependence maps
     t2s is a (nx,ny,nz) ndarray
     tes is a 1d array
     """
 
-    # Compute opt. com. raw data
-    tsoc = np.array(optcom(catd, t2sG, tes, mask, combmode, useG=True),
+    # compute optimal combination of raw data
+    tsoc = np.array(optcom(catd, t2sG, tes, mask, combmode),
                     dtype=float)[mask]
-    tsoc_mean = tsoc.mean(axis=-1)
-    tsoc_dm = tsoc - tsoc_mean[:, np.newaxis]
+    # demean optimal combination
+    tsoc_dm = tsoc - tsoc.mean(axis=-1, keepdims=True)
 
-    # Compute un-normalized weight dataset (features)
+    # compute un-normalized weight dataset (features)
     if mmixN is None:
         mmixN = mmix
     WTS = computefeats2(unmask(tsoc, mask), mmixN, mask, normalize=False)
 
-    # Compute PSC dataset - shouldn't have to refit data
+    # compute PSC dataset - shouldn't have to refit data
     global tsoc_B
     tsoc_B = get_coeffs(unmask(tsoc_dm, mask), mask, mmix)[mask]
     tsoc_Babs = np.abs(tsoc_B)
-    PSC = tsoc_B/tsoc.mean(axis=-1)[:, np.newaxis]*100
+    PSC = tsoc_B / tsoc.mean(axis=-1, keepdims=True) * 100
 
-    # Compute skews to determine signs based on unnormalized weights,
+    # compute skews to determine signs based on unnormalized weights,
     # correct mmix & WTS signs based on spatial distribution tails
-    from scipy.stats import skew
     signs = skew(WTS, axis=0)
     signs /= np.abs(signs)
     mmix = mmix.copy()
@@ -398,53 +405,52 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, head,
     totvar = (tsoc_B**2).sum()
     totvar_norm = (WTS**2).sum()
 
-    # Compute Betas and means over TEs for TE-dependence analysis
+    # compute Betas and means over TEs for TE-dependence analysis
     n_echos = len(tes)
-    betas = cat2echos(get_coeffs(uncat2echos(catd),
-                                 np.tile(mask, (1, 1, n_echos)),
-                                 mmix), n_echos)
-    nx, ny, nz, n_echos, nc = betas.shape
+    betas = get_coeffs(catd,
+                       np.repeat(mask[:, np.newaxis], n_echos, axis=1),
+                       mmix)
+    n_samp, n_echos, n_components = betas.shape
     Nm = mask.sum()
     NmD = (t2s != 0).sum()
     mu = catd.mean(axis=-1)
     tes = np.reshape(tes, (n_echos, 1))
     fmin, fmid, fmax = getfbounds(n_echos)
 
-    # Mask arrays
-    mumask = fmask(mu, t2s != 0)
-    t2smask = fmask(t2s, t2s != 0)
-    betamask = fmask(betas, t2s != 0)
+    # mask arrays
+    mumask = mu[t2s != 0]
+    t2smask = t2s[t2s != 0]
+    betamask = betas[t2s != 0]
 
-    # Setup Xmats
-    X1 = mumask.transpose()  # Model 1
-    X2 = np.tile(tes,
-                 (1, NmD)) * mumask.transpose() / t2smask.transpose()  # Model 2
+    # setup Xmats
+    X1 = mumask.T  # Model 1
+    X2 = np.tile(tes, (1, NmD)) * mumask.T / t2smask.T  # Model 2
 
-    # Tables for component selection
+    # tables for component selection
     global Kappas, Rhos, varex, varex_norm
     global Z_maps, F_R2_maps, F_S0_maps
     global Z_clmaps, F_R2_clmaps, F_S0_clmaps
     global Br_clmaps_R2, Br_clmaps_S0
-    Kappas = np.zeros([nc])
-    Rhos = np.zeros([nc])
-    varex = np.zeros([nc])
-    varex_norm = np.zeros([nc])
-    Z_maps = np.zeros([Nm, nc])
-    F_R2_maps = np.zeros([NmD, nc])
-    F_S0_maps = np.zeros([NmD, nc])
-    Z_clmaps = np.zeros([Nm, nc])
-    F_R2_clmaps = np.zeros([NmD, nc])
-    F_S0_clmaps = np.zeros([NmD, nc])
-    Br_clmaps_R2 = np.zeros([Nm, nc])
-    Br_clmaps_S0 = np.zeros([Nm, nc])
-
-    for i in range(nc):
-
-        # size of B is (nc, nx*ny*nz)
-        B = np.atleast_3d(betamask)[:, :, i].transpose()
+    Kappas = np.zeros([n_components])
+    Rhos = np.zeros([n_components])
+    varex = np.zeros([n_components])
+    varex_norm = np.zeros([n_components])
+    Z_maps = np.zeros([Nm, n_components])
+    F_R2_maps = np.zeros([NmD, n_components])
+    F_S0_maps = np.zeros([NmD, n_components])
+    Z_clmaps = np.zeros([Nm, n_components])
+    F_R2_clmaps = np.zeros([NmD, n_components])
+    F_S0_clmaps = np.zeros([NmD, n_components])
+    Br_clmaps_R2 = np.zeros([Nm, n_components])
+    Br_clmaps_S0 = np.zeros([Nm, n_components])
+
+    for i in range(n_components):
+
+        # size of B is (n_components, n_samp)
+        B = np.atleast_3d(betamask)[:, :, i].T
         alpha = (np.abs(B)**2).sum(axis=0)
-        varex[i] = (tsoc_B[:, i]**2).sum()/totvar*100.
-        varex_norm[i] = (unmask(WTS, mask)[t2s != 0][:, i]**2).sum()/totvar_norm * 100.
+        varex[i] = (tsoc_B[:, i]**2).sum() / totvar * 100.
+        varex_norm[i] = (unmask(WTS, mask)[t2s != 0][:, i]**2).sum() / totvar_norm * 100.
 
         # S0 Model
         coeffs_S0 = (B * X1).sum(axis=0) / (X1**2).sum(axis=0)
@@ -457,26 +463,23 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, head,
         coeffs_R2 = (B * X2).sum(axis=0) / (X2**2).sum(axis=0)
         SSE_R2 = (B - X2 * np.tile(coeffs_R2, (n_echos, 1)))**2
         SSE_R2 = SSE_R2.sum(axis=0)
-        F_R2 = (alpha - SSE_R2)*2/(SSE_R2)
+        F_R2 = (alpha - SSE_R2) * 2 / (SSE_R2)
         F_R2_maps[:, i] = F_R2
 
-        # Compute weights as Z-values
+        # compute weights as Z-values
         wtsZ = (WTS[:, i] - WTS[:, i].mean()) / WTS[:, i].std()
         wtsZ[np.abs(wtsZ) > Z_MAX] = (Z_MAX * (np.abs(wtsZ) / wtsZ))[np.abs(wtsZ) > Z_MAX]
         Z_maps[:, i] = wtsZ
 
-        # Compute Kappa and Rho
+        # compute Kappa and Rho
         F_S0[F_S0 > F_MAX] = F_MAX
         F_R2[F_R2 > F_MAX] = F_MAX
-        Kappas[i] = np.average(F_R2,
-                               weights=np.abs(np.squeeze(unmask(wtsZ,
-                                                                mask)[t2s != 0]**2.)))
-        Rhos[i] = np.average(F_S0,
-                             weights=np.abs(np.squeeze(unmask(wtsZ,
-                                                              mask)[t2s != 0]**2.)))
+        norm_weights = np.abs(np.squeeze(unmask(wtsZ, mask)[t2s != 0]**2.))
+        Kappas[i] = np.average(F_R2, weights=norm_weights)
+        Rhos[i] = np.average(F_S0, weights=norm_weights)
 
     # Tabulate component values
-    comptab_pre = np.vstack([np.arange(nc), Kappas, Rhos, varex, varex_norm]).T
+    comptab_pre = np.vstack([np.arange(n_components), Kappas, Rhos, varex, varex_norm]).T
     if reindex:
         # Re-index all components in Kappa order
         comptab = comptab_pre[comptab_pre[:, 1].argsort()[::-1], :]
@@ -501,10 +504,10 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, head,
     # Full selection including clustering criteria
     seldict = None
     if full_sel:
-        for i in range(nc):
+        for i in range(n_components):
 
             # Save out files
-            out = np.zeros((nx, ny, nz, 4))
+            out = np.zeros((n_samp, 4))
             if fout is not None:
                 ccname = "cc%.3d.nii" % i
             else:
@@ -1023,33 +1026,28 @@ def selcomps(seldict, mmix, head, manacc, debug=False, olevel=2, oversion=99,
 
 
 def tedpca(combmode, mask, stabilize, head, ste=0, mlepca=True):
+
     n_samp, n_echos, n_vols = catd.shape
     ste = np.array([int(ee) for ee in str(ste).split(',')])
+
     if len(ste) == 1 and ste[0] == -1:
         lgr.info('-Computing PCA of optimally combined multi-echo data')
-        d = OCcatd[make_min_mask(OCcatd[:, np.newaxis, :])]
-        eim = eimask(d[:, np.newaxis, :])
-        eim = eim[:, 0] == 1
-        d = d[eim, :]
+        d = OCcatd[make_min_mask(OCcatd[:, np.newaxis, :])][:, np.newaxis, :]
     elif len(ste) == 1 and ste[0] == 0:
         lgr.info('-Computing PCA of spatially concatenated multi-echo data')
-        ste = np.arange(n_echos)
         d = catd[mask].astype('float64')
-        eim = eimask(d) == 1
-        d = d[eim]
     else:
         lgr.info('-Computing PCA of TE #%s' % ','.join([str(ee) for ee in ste]))
-        d = np.concatenate([catd[mask, ee, :][:, np.newaxis] for ee in ste - 1],
-                           axis=1).astype('float64')
-        eim = np.squeeze(eimask(d) == 1)
-        d = np.squeeze(d[eim])
+        d = np.stack([catd[mask, ee] for ee in ste - 1], axis=1).astype('float64')
+
+    eim = np.squeeze(eimask(d))
+    d = np.squeeze(d[eim])
 
     dz = ((d.T - d.T.mean(axis=0)) / d.T.std(axis=0)).T  # var normalize ts
     dz = (dz - dz.mean()) / dz.std()  # var normalize everything
 
     if not os.path.exists('pcastate.pkl'):
-
-        # Do PC dimension selection and get eigenvalue cutoff
+        # do PC dimension selection and get eigenvalue cutoff
         if mlepca:
             from sklearn.decomposition import PCA
             ppca = PCA(n_components='mle', svd_solver='full')
@@ -1060,8 +1058,9 @@ def tedpca(combmode, mask, stabilize, head, ste=0, mlepca=True):
         else:
             u, s, v = np.linalg.svd(dz, full_matrices=0)
 
-        sp = s/s.sum()
-        eigelb = sp[getelbow_mod(sp)]
+        # actual variance explained (normalized)
+        sp = s / s.sum()
+        eigelb = getelbow_mod(sp, val=True)
 
         spdif = np.abs(sp[1:] - sp[:-1])
         spdifh = spdif[(spdif.shape[0]//2):]
@@ -1079,7 +1078,7 @@ def tedpca(combmode, mask, stabilize, head, ste=0, mlepca=True):
 
         # Compute K and Rho for PCA comps
         eimum = np.atleast_2d(eim)
-        eimum = np.transpose(eimum, np.argsort(np.atleast_2d(eim).shape)[::-1])
+        eimum = np.transpose(eimum, np.argsort(eimum.shape)[::-1])
         eimum = eimum.prod(axis=1)
         o = np.zeros((mask.shape[0], *eimum.shape[1:]))
         o[mask] = eimum
@@ -1160,7 +1159,7 @@ def tedpca(combmode, mask, stabilize, head, ste=0, mlepca=True):
 def tedica(nc, dd, conv, fixed_seed, cost, final_cost):
     """
     Input is dimensionally reduced spatially concatenated multi-echo
-    time series dataset from tedpca(). Output is comptable, mmix, smaps
+    time series dataset from `tedpca`. Output is comptable, mmix, smaps
     from ICA, and betas from fitting catd to mmix.
     """
     import mdp
@@ -1176,7 +1175,7 @@ def tedica(nc, dd, conv, fixed_seed, cost, final_cost):
     return mmix
 
 
-def gscontrol_raw(OCcatd, head, n_echos, dtrank=4):
+def gscontrol_raw(catd, OCcatd, head, n_echos, dtrank=4):
     """
     This function uses the spatial global signal estimation approach to
     modify catd (global variable) to removal global signal out of individual
@@ -1188,9 +1187,7 @@ def gscontrol_raw(OCcatd, head, n_echos, dtrank=4):
     lgr.info('++ Applying amplitude-based T1 equilibration correction')
 
     # Legendre polynomial basis for denoising
-    n_vols = OCcatd.shape[-1]
-    Lmix = np.array([lpmv(0, vv, np.linspace(-1, 1, n_vols))
-                     for vv in range(dtrank)]).T
+    Lmix = np.array([lpmv(0, vv, np.linspace(-1, 1, OCcatd.shape[-1])) for vv in range(dtrank)]).T
 
     # compute mean, std, mask local to this function
     # inefficient, but makes this function a bit more modular
@@ -1198,7 +1195,9 @@ def gscontrol_raw(OCcatd, head, n_echos, dtrank=4):
     Gmask = Gmu != 0
 
     # Find spatial global signal
+    # BUG: this is indexing differently!!!!! and the subtraction is causing differences
     dat = OCcatd[Gmask] - Gmu[Gmask][:, np.newaxis]
+    # ^^^ THIS IS THE BAD PLACE
     sol = np.linalg.lstsq(Lmix, dat.T)[0]  # Legendre basis for detrending
     detr = dat - np.dot(sol.T, Lmix.T)[0]
     sphis = (detr).min(axis=1)
@@ -1218,16 +1217,16 @@ def gscontrol_raw(OCcatd, head, n_echos, dtrank=4):
                              np.atleast_2d(glbase.T[dtrank])) + Gmu[Gmask][:, np.newaxis]
 
     # niwrite(OCcatd, aff, 'tsoc_orig.nii', head)  # FIXME
-    OCcatd[Gmask] = tsoc_nogs
+    OCcatd = unmask(tsoc_nogs, Gmask)
     # niwrite(OCcatd, aff, 'tsoc_nogs.nii', head)  # FIXME
 
     # Project glbase out of each echo
     for echo in range(n_echos):
-        dat = catd[Gmask, echo, :]
+        dat = catd[:, echo, :][Gmask]
         sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T)[0]
         e_nogs = dat - np.dot(np.atleast_2d(sol[dtrank]).T,
                               np.atleast_2d(glbase.T[dtrank]))
-        catd[Gmask, echo, :] = e_nogs
+        catd[:, echo, :] = unmask(e_nogs, Gmask)
 
     return catd, OCcatd
 
@@ -1496,12 +1495,12 @@ def main(options):
     global OCcatd
     OCcatd = optcom(catd, t2sG, tes, mask, combmode)
     if not options.no_gscontrol:
-        catd, OCcatd = gscontrol_raw(OCcatd, head, len(tes))
+        catd, OCcatd = gscontrol_raw(catd, OCcatd, head, len(tes))
 
     if options.mixm is None:
         lgr.info('++ Doing ME-PCA and ME-ICA')
 
-        nc, dd = tedpca(combmode, mask, stabilize, head, ste=options.ste)
+        nc, dd = tedpca(catd, combmode, mask, stabilize, head, ste=options.ste)
 
         mmix_orig = tedica(nc, dd, options.conv, options.fixed_seed,
                            cost=options.initcost,
diff --git a/tedana/utils/utils.py b/tedana/utils/utils.py
index e8b25af59..08af43602 100644
--- a/tedana/utils/utils.py
+++ b/tedana/utils/utils.py
@@ -32,7 +32,7 @@ def load_image(data):
         else:
             data = check_niimg(data).get_data()
 
-    fdata = data.reshape((-1,) + data.shape[3:])
+    fdata = data.reshape((-1,) + data.shape[3:], order='F')
 
     return fdata.squeeze()
 
@@ -63,30 +63,22 @@ def cat2echos(data, n_echos=None):
         # individual echo files were provided
         if len(data) > 2:
             fdata = np.stack([load_image(f) for f in data], axis=1)
-        # a z-concatenated file was provided
+            if fdata.ndim < 3:
+                fdata = fdata[..., np.newaxis]
+            return fdata
+        # a z-concatenated file was provided; load data and pipe it down
         elif len(data) == 1:
             if n_echos is None:
                 raise ValueError('Number of echos `n_echos` must be specified '
                                  'if z-concatenated data file provided.')
-            fdata = load_image(data[0])
+            data = check_niimg(data[0]).get_data()
         # only two echo files were provided, which doesn't fly
         else:
             raise ValueError('Cannot run `tedana` with only two echos: '
                              '{}'.format(data))
-        # ensure data has a time axis
-        if fdata.ndim < 3:
-            fdata = fdata[..., np.newaxis]
-    # data array was provided (is this necessary?)
-    elif isinstance(data, np.ndarray):
-        if data.ndim != 4:
-            raise ValueError('Data must be 4-dimensional, where the '
-                             'dimensions correspond to: (1) first spatial '
-                             'dimensions, (2) second spatial dimension, (3) '
-                             'third spatial dimension x number of echos, and '
-                             '(4) time. Provided data dimensions were: '
-                             '{}'.format(data.shape))
-        nx, ny, nz = data.shape[:2], data.shape[2] // n_echos
-        fdata = load_image(data.reshape(nx, ny, nz, n_echos, -1, order='F'))
+
+    (nx, ny), nz = data.shape[:2], data.shape[2] // n_echos
+    fdata = load_image(data.reshape(nx, ny, nz, n_echos, -1, order='F'))
 
     return fdata
 
@@ -268,31 +260,20 @@ def unmask(data, mask):
 
     Parameters
     ----------
-    data : (S x E x T) array_like
-        Masked array, where S is samples flattened across spatial dimensions
-    mask : (X x Y x Z) array_like
-        Boolean array that was used to mask `data`
+    data : (M x E x T) array_like
+        Masked array, where `M` is the number of samples
+    mask : (S,) array_like
+        Boolean array of `S` samples that was used to mask `data`
 
     Returns
     -------
-    fdata : (X x Y x Z x E x T) np.ndarray
-        Unmasked `data` array with spatial dimensions intact
+    out : (S x E x T) np.ndarray
+        Unmasked `data` array
     """
 
-    M = (mask != 0).ravel()
-    Nm = M.sum()
-
-    nx, ny, nz = mask.shape
-
-    if len(data.shape) > 1:
-        nt = data.shape[1]
-    else:
-        nt = 1
-
-    out = np.zeros((nx * ny * nz, nt), dtype=data.dtype)
-    out[M, :] = np.reshape(data, (Nm, nt))
-
-    return np.squeeze(np.reshape(out, (nx, ny, nz, nt)))
+    out = np.zeros((mask.shape + data.shape[1:]))
+    out[mask] = data
+    return out
 
 
 def moments(data):

From 10ef6d73e836b67df4d23b9e3e185ea2044c67d7 Mon Sep 17 00:00:00 2001
From: Ross Markello <rossmarkello@gmail.com>
Date: Mon, 7 May 2018 00:33:35 -0400
Subject: [PATCH 05/18] [RF] niwrite --> filewrite, better gifti support

More major overhauls in the process of supporting GIFTI files. The `niwrite`
functions has been killed to give way to `filewrite`. Lots of stylistic changes
in the proces of integrating this throughout `tedana.interfaces.tedana`, but
mostly just significant new functionality in `tedana.utils`.
---
 tedana/cli/run.py             |   2 +-
 tedana/interfaces/__init__.py |   4 +-
 tedana/interfaces/t2smap.py   |  67 +++---
 tedana/interfaces/tedana.py   | 408 +++++++++++++++-------------------
 tedana/tests/test_utils.py    |   4 -
 tedana/utils/__init__.py      |  14 +-
 tedana/utils/utils.py         | 265 ++++++++++++++++++----
 7 files changed, 446 insertions(+), 318 deletions(-)

diff --git a/tedana/cli/run.py b/tedana/cli/run.py
index 092d984ad..7ef7cc4f6 100644
--- a/tedana/cli/run.py
+++ b/tedana/cli/run.py
@@ -20,7 +20,7 @@ def get_parser():
     parser.add_argument('-e',
                         dest='tes',
                         nargs='+',
-                        help='Echo times (in ms) ex: 15,39,63',
+                        help='Echo times (in ms) ex: 15.0 39.0 63.0',
                         required=True)
     parser.add_argument('--mix',
                         dest='mixm',
diff --git a/tedana/interfaces/__init__.py b/tedana/interfaces/__init__.py
index de85773df..94e7677f0 100644
--- a/tedana/interfaces/__init__.py
+++ b/tedana/interfaces/__init__.py
@@ -1,7 +1,7 @@
 # emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
 # ex: set sts=4 ts=4 sw=4 et:
 
-from .t2smap import (t2sadmap, optcom)
+from .t2smap import (t2sadmap, make_optcom)
 
 __all__ = [
-    't2sadmap', 'optcom']
+    't2sadmap', 'make_optcom']
diff --git a/tedana/interfaces/t2smap.py b/tedana/interfaces/t2smap.py
index d78feb8c1..61020e168 100644
--- a/tedana/interfaces/t2smap.py
+++ b/tedana/interfaces/t2smap.py
@@ -1,8 +1,8 @@
 import numpy as np
-import nibabel as nib
-from tedana.utils import (niwrite, cat2echos, makeadmask, unmask, fmask)
+from tedana.utils import (filewrite, load_data, makeadmask, unmask, fmask)
 
 import logging
+logging.basicConfig(format='[%(levelname)s]: %(message)s', level=logging.INFO)
 lgr = logging.getLogger(__name__)
 
 
@@ -115,7 +115,7 @@ def t2sadmap(data, tes, mask, masksum, start_echo):
         X = np.repeat(x, n_vols, axis=0)
 
         beta, res, rank, sing = np.linalg.lstsq(X, B)
-        t2s = 1 / beta[1, :].T
+        t2s = 1. / beta[1, :].T
         s0 = np.exp(beta[0, :]).T
 
         t2s[np.isinf(t2s)] = 500.  # why 500?
@@ -141,18 +141,18 @@ def t2sadmap(data, tes, mask, masksum, start_echo):
     return t2sa, s0va, t2ss, s0vs, t2saf, s0vaf
 
 
-def optcom(data, t2s, tes, mask, combmode):
+def make_optcom(data, t2s, tes, mask, combmode):
     """
     Optimally combine BOLD data across TEs.
 
-    out = optcom(data,t2s)
+    out = make_optcom(data,t2s)
 
     Parameters
     ----------
     data : (S x E x T) :obj:`numpy.ndarray`
         Concatenated BOLD data.
     t2 : (S,) :obj:`numpy.ndarray`
-        3D map of estimated T2* values.
+        Estimated T2* values.
     tes : :obj:`numpy.ndarray`
         Array of TEs, in seconds.
     mask : (S,) :obj:`numpy.ndarray`
@@ -172,31 +172,27 @@ def optcom(data, t2s, tes, mask, combmode):
     mdata = data[mask]
     tes = np.array(tes)[np.newaxis]  # (1 x E) array_like
 
-    if len(t2s.shape) == 3:
-        print('Optimally combining with voxel-wise T2 estimates')
-        ft2s = t2s[:, np.newaxis]
+    if t2s.ndim == 1:
+        lgr.info('Optimally combining with voxel-wise T2 estimates')
+        ft2s = t2s[mask, np.newaxis]
     else:
-        print('Optimally combining with voxel- and volume-wise T2 estimates')
-        ft2s = t2s[:, :, np.newaxis]
-
-    if combmode == 'ste':
-        alpha = mdata.mean(-1) * tes
-    else:
-        alpha = tes * np.exp(-tes / ft2s)
+        lgr.info('Optimally combining with voxel- and volume-wise T2 estimates')
+        ft2s = t2s[mask, :, np.newaxis]
 
     if combmode == 'ste':
         alpha = mdata.mean(axis=-1) * tes
     else:
-        alpha = tes * np.exp(-tes / t2s[mask])
+        alpha = tes * np.exp(-tes / ft2s)
 
-    if len(t2s.shape) == 3:
+    if t2s.ndim == 1:
         alpha = np.tile(alpha[:, :, np.newaxis], (1, 1, n_vols))
     else:
         alpha = np.swapaxes(alpha, 1, 2)
         ax0_idx, ax2_idx = np.where(np.all(alpha == 0, axis=1))
         alpha[ax0_idx, :, ax2_idx] = 1.
 
-    fout = unmask(np.average(mdata, axis=1, weights=alpha), mask)
+    fout = np.average(mdata, axis=1, weights=alpha)
+    fout = unmask(fout, mask)
 
     return fout
 
@@ -216,28 +212,27 @@ def main(options):
         suf = '_%s' % str(options.label)
     else:
         suf = ''
+    tes, data, combmode = options.tes, options.data, options.combmode
 
-    tes = [float(te) for te in options.tes]
+    tes = [float(te) for te in tes]
     n_echos = len(tes)
-    catim = nib.load(options.data[0])
-    head = catim.get_header()
-    head.extensions = []
-    head.set_sform(head.get_sform(), code=1)
-    aff = catim.get_affine()
-    catd = cat2echos(catim.get_data(), n_echos)
-    nx, ny, nz, n_echos, n_trs = catd.shape
+
+    catd = load_data(data, n_echos=n_echos)
+    n_samp, n_echos, n_trs = catd.shape
+
+    ref_img = data[0] if isinstance(data, list) else data
 
     lgr.info("++ Computing Mask")
     mask, masksum = makeadmask(catd, minimum=False, getsum=True)
-    niwrite(masksum, aff, 'masksum%s.nii' % suf)
+    filewrite(masksum, 'masksum%s' % suf, ref_img, copy_header=False)
 
     lgr.info("++ Computing Adaptive T2* map")
-    t2s, s0, t2ss, s0vs, t2saf, s0vaf = t2sadmap(catd, mask, tes, masksum, 2)
-    niwrite(t2ss, aff, 't2ss%s.nii' % suf)
-    niwrite(s0vs, aff, 's0vs%s.nii' % suf)
+    t2s, s0, t2ss, s0vs, t2saf, s0vaf = t2sadmap(catd, tes, mask, masksum, 2)
+    filewrite(t2ss, 't2ss%s' % suf, ref_img, copy_header=False)
+    filewrite(s0vs, 's0vs%s' % suf, ref_img, copy_header=False)
 
     lgr.info("++ Computing optimal combination")
-    tsoc = np.array(optcom(catd, t2s, tes, mask, options.combmode),
+    tsoc = np.array(make_optcom(catd, t2s, tes, mask, combmode),
                     dtype=float)
 
     # Clean up numerical errors
@@ -249,7 +244,7 @@ def main(options):
     t2s[t2s < 0] = 0
     t2sm[t2sm < 0] = 0
 
-    niwrite(tsoc, aff, 'ocv%s.nii' % suf)
-    niwrite(s0, aff, 's0v%s.nii' % suf)
-    niwrite(t2s, aff, 't2sv%s.nii' % suf)
-    niwrite(t2sm, aff, 't2svm%s.nii' % suf)
+    filewrite(tsoc, 'ocv%s' % suf, ref_img, copy_header=False)
+    filewrite(s0, 's0v%s' % suf, ref_img, copy_header=False)
+    filewrite(t2s, 't2sv%s' % suf, ref_img, copy_header=False)
+    filewrite(t2sm, 't2svm%s' % suf, ref_img, copy_header=False)
diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py
index 09ced5ac3..b39d32efe 100644
--- a/tedana/interfaces/tedana.py
+++ b/tedana/interfaces/tedana.py
@@ -1,21 +1,21 @@
-
 import os
 import os.path as op
 import shutil
 import pickle
 import textwrap
 import numpy as np
-import nibabel as nib
 from scipy import stats
 from sklearn import svm
 from scipy.special import lpmv
 from sklearn.cluster import DBSCAN
-from tedana.interfaces import (optcom, t2sadmap)
-from tedana.utils import (cat2echos, make_min_mask,
-                          makeadmask, fmask, unmask,
-                          fitgaussian, niwrite, dice, andb)
+from tedana.interfaces import (make_optcom, t2sadmap)
+from tedana.utils import (load_image, load_data, get_dtype,
+                          make_min_mask, makeadmask,
+                          fmask, unmask, filewrite,
+                          fitgaussian, dice, andb)
 
 import logging
+logging.basicConfig(format='[%(levelname)s]: %(message)s', level=logging.INFO)
 lgr = logging.getLogger(__name__)
 
 """
@@ -76,7 +76,7 @@ def do_svm(X_train, y_train, X_test, svmtype=0):
     return y_pred, clf
 
 
-def spatclust(data, mask, csize, thr, header, aff, infile=None, dindex=0,
+def spatclust(data, mask, csize, thr, ref_img, infile=None, dindex=0,
               tindex=0):
     """
     Thresholds and spatially clusters `data`
@@ -98,53 +98,24 @@ def spatclust(data, mask, csize, thr, header, aff, infile=None, dindex=0,
     clustered :
     """
 
-    # threshold image
-
     if infile is None:
         data = data.copy()
         data[data < thr] = 0
-        niwrite(unmask(data, mask), aff, '__clin.nii.gz', header)
-        infile = '__clin.nii.gz'
+        infile = filewrite(unmask(data, mask), '__clin', ref_img, gzip=True)
+
+    # FIXME: ideally no calls to os.system!!! (or AFNI, for that matter)
     addopts = ''
-    if data is not None and len(np.squeeze(data).shape) > 1 and dindex + tindex == 0:
+    if data is not None and data.squeeze().ndim > 1 and dindex + tindex == 0:
         addopts = '-doall'
     else:
         addopts = '-1dindex {0} -1tindex {1}'.format(str(dindex), str(tindex))
 
-    #
     cmd_str = '3dmerge -overwrite {0} -dxyz=1 -1clust 1 {1:d} ' \
               '-1thresh {2:.02f} -prefix __clout.nii.gz {3}'
     os.system(cmd_str.format(addopts, int(csize), float(thr), infile))
-    clustered = fmask(nib.load('__clout.nii.gz').get_data(), mask) != 0
-    return clustered
-
 
-def rankvec(vals):
-    """
-    Returns ranks of array
-
-    Parameters
-    ----------
-    vals : array-like
-        1d array from which to determine ranks.
-
-    Returns
-    -------
-    ranks : array-like
-        1d array of ranks for values in input vals.
-    """
-    try:
-        vals = np.array(vals)
-    except Exception:  # would this ever happen????
-        raise IOError('Input vals is not array_like')
-
-    if len(vals.shape) != 1:
-        raise ValueError('Input vals is not 1d array')
-
-    asort = np.argsort(vals)
-    ranks = np.zeros(vals.shape[0])
-    ranks[asort] = np.arange(vals.shape[0]) + 1
-    return ranks
+    clustered = load_image('__clout.nii.gz')[mask] != 0
+    return clustered
 
 
 def get_coeffs(data, mask, X, add_const=False):
@@ -188,14 +159,14 @@ def get_coeffs(data, mask, X, add_const=False):
 
 
 def getelbow_cons(ks, val=False):
-    """Elbow using mean/variance method - conservative
+    """
+    Elbow using mean/variance method - conservative
 
     Parameters
     ----------
     ks : array-like
-
     val : bool, optional
-        Default is False
+        Return the value of the elbow instead of the index. Default: False
 
     Returns
     -------
@@ -203,6 +174,7 @@ def getelbow_cons(ks, val=False):
         Either the elbow index (if val is True) or the values at the elbow
         index (if val is False)
     """
+
     ks = np.sort(ks)[::-1]
     nk = len(ks)
     temp1 = [(ks[nk - 5 - ii - 1] > ks[nk - 5 - ii:nk].mean() + 2 * ks[nk - 5 - ii:nk].std())
@@ -223,14 +195,14 @@ def getelbow_cons(ks, val=False):
 
 
 def getelbow_mod(ks, val=False):
-    """Elbow using linear projection method - moderate
+    """
+    Elbow using linear projection method - moderate
 
     Parameters
     ----------
     ks : array-like
-
     val : bool, optional
-        Default is False
+        Return the value of the elbow instead of the index. Default: False
 
     Returns
     -------
@@ -256,7 +228,8 @@ def getelbow_mod(ks, val=False):
 
 
 def getelbow_aggr(ks, val=False):
-    """Elbow using curvature - aggressive
+    """
+    Elbow using curvature - aggressive
 
     Parameters
     ----------
@@ -271,6 +244,7 @@ def getelbow_aggr(ks, val=False):
         Either the elbow index (if val is True) or the values at the elbow
         index (if val is False)
     """
+
     ks = np.sort(ks)[::-1]
     dKdt = ks[:-1] - ks[1:]
     dKdt2 = dKdt[:-1] - dKdt[1:]
@@ -286,15 +260,17 @@ def getelbow_aggr(ks, val=False):
 
 def getfbounds(n_echos):
     """
-
     Parameters
     ----------
     n_echos : int
-        Number of echoes.
+        Number of echoes
 
     Returns
     -------
+    fmin, fmid, fmax : float
+        Minimum, mid, and max F bounds
     """
+
     if not isinstance(n_echos, int):
         raise IOError('Input n_echos must be int')
     elif n_echos <= 0:
@@ -311,6 +287,7 @@ def eimask(dd, ees=None):
     """
     Returns mask for data between [0.001, 5] * 98th percentile of dd
     """
+
     if ees is None:
         ees = range(dd.shape[1])
     imask = np.zeros([dd.shape[0], len(ees)], dtype=bool)
@@ -340,7 +317,7 @@ def computefeats2(data, mmix, mask, normalize=True):
     # Write feature versions of components
     data = data[mask]
     # demean data
-    data_vn = (data - data.mean(axis=-1, keepdims=True)) / data.std(axis=-1, keepdims=True)
+    data_vn = stats.zscore(data, axis=-1)
     # get betas for demeaned data against `mmix`
     data_R = get_coeffs(unmask(data_vn, mask), mask, mmix)[mask]
     # cap betas to range [-0.999, 0.999]
@@ -352,8 +329,8 @@ def computefeats2(data, mmix, mask, normalize=True):
         data_Z = np.atleast_2d(data_Z).T
     if normalize:
         # standardize
-        data_Zm = (data_Z - data_Z.mean(axis=0, keepdims=True)) / data_Z.std(axis=0, keepdims=True)
-        # add back mean / stdev
+        data_Zm = stats.zscore(data_Z, axis=0)
+        # add back (mean / stdev)
         data_Z = data_Zm + (data_Z.mean(axis=0, keepdims=True) / data_Z.std(axis=0, keepdims=True))
     return data_Z
 
@@ -369,17 +346,31 @@ def ctabsel(ctabfile):
     return tuple([np.array(class_dict[kk], dtype=int) for kk in class_tags])
 
 
-def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, head,
+def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img,
                      fout=None, reindex=False, mmixN=None, full_sel=True):
     """
-    Input:
-    fout is flag for output of per-component TE-dependence maps
-    t2s is a (nx,ny,nz) ndarray
-    tes is a 1d array
+    Parameters
+    ----------
+    catd : (S x E x T) array_like
+    mmix : (T x C) array_like
+    mask : (S,) array_like
+    t2s : (S,) array_like
+    t2sG : (S,) array_like
+    tes : (E,) list
+    combmode : str
+    ref_img : str or img_like
+    fout : bool
+        Whether to output per-component TE-dependencen maps Default: None
+    reindex : bool, optional
+        Default: False
+    mmixN : array_like, optional
+        Default: None
+    full_sel : bool, optional
+        Default: True
     """
 
     # compute optimal combination of raw data
-    tsoc = np.array(optcom(catd, t2sG, tes, mask, combmode),
+    tsoc = np.array(make_optcom(catd, t2sG, tes, mask, combmode),
                     dtype=float)[mask]
     # demean optimal combination
     tsoc_dm = tsoc - tsoc.mean(axis=-1, keepdims=True)
@@ -407,10 +398,7 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, head,
     totvar_norm = (WTS**2).sum()
 
     # compute Betas and means over TEs for TE-dependence analysis
-    n_echos = len(tes)
-    betas = get_coeffs(catd,
-                       np.repeat(mask[:, np.newaxis], n_echos, axis=1),
-                       mmix)
+    betas = get_coeffs(catd, np.repeat(mask[:, np.newaxis], len(tes), axis=1), mmix)
     n_samp, n_echos, n_components = betas.shape
     n_voxels = mask.sum()
     n_data_voxels = (t2s != 0).sum()
@@ -423,9 +411,9 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, head,
     t2smask = t2s[t2s != 0]
     betamask = betas[t2s != 0]
 
-    # setup Xmats
+    # set up Xmats
     X1 = mumask.T  # Model 1
-    X2 = np.tile(tes, (1, n_voxels)) * mumask.T / t2smask.T  # Model 2
+    X2 = np.tile(tes, (1, n_data_voxels)) * mumask.T / t2smask.T  # Model 2
 
     # tables for component selection
     global Kappas, Rhos, varex, varex_norm
@@ -478,10 +466,10 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, head,
         Kappas[i] = np.average(F_R2, weights=norm_weights)
         Rhos[i] = np.average(F_S0, weights=norm_weights)
 
-    # Tabulate component values
+    # tabulate component values
     comptab_pre = np.vstack([np.arange(n_components), Kappas, Rhos, varex, varex_norm]).T
     if reindex:
-        # Re-index all components in Kappa order
+        # re-index all components in Kappa order
         comptab = comptab_pre[comptab_pre[:, 1].argsort()[::-1], :]
         Kappas = comptab[:, 1]
         Rhos = comptab[:, 2]
@@ -501,53 +489,57 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, head,
         comptab = comptab_pre
         mmix_new = mmix
 
-    # Full selection including clustering criteria
+    # full selection including clustering criteria
     seldict = None
     if full_sel:
         for i in range(n_components):
-            # Save out files
+            # save out files
             out = np.zeros((n_samp, 4))
             if fout is not None:
-                ccname = "cc%.3d.nii" % i
+                ccname, gzip = 'cc{:03d}'.format(i), False
             else:
-                ccname = ".cc_temp.nii.gz"
+                ccname, gzip = '.cc_temp', True
 
-            out[:, :, :, 0] = np.squeeze(unmask(PSC[:, i], mask))
-            out[:, :, :, 1] = np.squeeze(unmask(F_R2_maps[:, i], t2s != 0))
-            out[:, :, :, 2] = np.squeeze(unmask(F_S0_maps[:, i], t2s != 0))
-            out[:, :, :, 3] = np.squeeze(unmask(Z_maps[:, i], mask))
+            out[:, 0] = np.squeeze(unmask(PSC[:, i], mask))
+            out[:, 1] = np.squeeze(unmask(F_R2_maps[:, i], t2s != 0))
+            out[:, 2] = np.squeeze(unmask(F_S0_maps[:, i], t2s != 0))
+            out[:, 3] = np.squeeze(unmask(Z_maps[:, i], mask))
+
+            filewrite(out, ccname, ref_img, gzip=gzip)
+
+            if get_dtype(ref_img) == 'GIFTI':
+                continue  # TODO: pass through GIFTI file data as below
 
-            niwrite(out, fout, ccname, head)
             os.system('3drefit -sublabel 0 PSC -sublabel 1 F_R2 -sublabel 2 F_SO '
-                      '-sublabel 3 Z_sn %s 2> /dev/null > /dev/null' % ccname)
+                      '-sublabel 3 Z_sn {} 2> /dev/null > /dev/null'.format(ccname))
 
             csize = np.max([int(n_voxels * 0.0005) + 5, 20])
 
             # Do simple clustering on F
             # TODO: can be replaced with nilearn.image.threshold_img
-            os.system("3dcalc -overwrite -a %s[1..2] -expr 'a*step(a-%i)' -prefix .fcl_in.nii.gz "
-                      "-overwrite" % (ccname, fmin))
+            os.system('3dcalc -overwrite -a {}[1..2] -expr \'a*step(a-{})\' -prefix '
+                      '.fcl_in.nii.gz -overwrite'.format(ccname, fmin))
             # TODO: can be replaced with nilearn.regions.connected_regions
-            os.system('3dmerge -overwrite -dxyz=1 -1clust 1 %i -doall '
-                      '-prefix .fcl_out.nii.gz .fcl_in.nii.gz' % (csize))
-            sel = fmask(nib.load('.fcl_out.nii.gz').get_data(), t2s != 0) != 0
-            sel = np.array(sel, dtype=np.int)
+            os.system('3dmerge -overwrite -dxyz=1 -1clust 1 {} -doall '
+                      '-prefix .fcl_out.nii.gz .fcl_in.nii.gz'.format(csize))
+            sel = load_image('.fcl_out.nii.gz')[t2s != 0]
+            sel = np.array(sel != 0, dtype=np.int)
             F_R2_clmaps[:, i] = sel[:, 0]
             F_S0_clmaps[:, i] = sel[:, 1]
 
             # Do simple clustering on Z at p<0.05
-            sel = spatclust(None, mask, csize, 1.95, head, aff,
+            sel = spatclust(None, mask, csize, 1.95, ref_img,
                             infile=ccname, dindex=3, tindex=3)
             Z_clmaps[:, i] = sel
 
             # Do simple clustering on ranked signal-change map
             countsigFR2 = F_R2_clmaps[:, i].sum()
             countsigFS0 = F_S0_clmaps[:, i].sum()
-            Br_clmaps_R2[:, i] = spatclust(rankvec(tsoc_Babs[:, i]), mask,
-                                           csize, max(tsoc_Babs.shape)-countsigFR2, head, aff)
-            Br_clmaps_S0[:, i] = spatclust(rankvec(tsoc_Babs[:, i]), mask,
+            Br_clmaps_R2[:, i] = spatclust(stats.rankdata(tsoc_Babs[:, i]), mask,
+                                           csize, max(tsoc_Babs.shape)-countsigFR2, ref_img)
+            Br_clmaps_S0[:, i] = spatclust(stats.rankdata(tsoc_Babs[:, i]), mask,
                                            csize, max(tsoc_Babs.shape)-countsigFS0,
-                                           head, aff)
+                                           ref_img)
 
         seldict = {}
         selvars = ['Kappas', 'Rhos', 'WTS', 'varex', 'Z_maps', 'F_R2_maps',
@@ -667,7 +659,7 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi
                                                            mask)[:, :, :, ii])))
         fproj_z = fproj.max(2)
         fproj[fproj == fproj.max()] = 0
-        fproj_arr[:, ii] = rankvec(fproj_z.flatten())
+        fproj_arr[:, ii] = stats.rankdata(fproj_z.flatten())
         fproj_arr_val[:, ii] = fproj_z.flatten()
         spr.append(np.array(fproj_z > fproj_z.max() / 4, dtype=np.int).sum())
         fprojr = np.array([fproj, fproj[:, :, ::-1]]).max(0)
@@ -882,9 +874,9 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi
     # To write out veinmask
     veinout = np.zeros(t2s.shape)
     veinout[t2s!=0] = veinmaskf
-    niwrite(veinout,aff,'veinmaskf.nii',head)
-    veinBout = unmask(veinmaskB,mask)
-    niwrite(veinBout,aff,'veins50.nii',head)
+    filewrite(veinout, 'veinmaskf', ref_img)
+    veinBout = unmask(veinmaskB, mask)
+    filewrite(veinBout, 'veins50', ref_img)
     """
 
     tsoc_B_Zcl = np.zeros(tsoc_B.shape)
@@ -928,7 +920,7 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi
         group0_res = np.intersect1d(KRguess, group0)
         phys_var_zs.append((vvex - vvex[group0_res].mean()) / vvex[group0_res].std())
         veinBout = unmask(veinmaskB, mask)
-        niwrite(veinBout, aff, 'veins_l%i.nii' % t2sl_i, head)
+        filewrite(veinBout, 'veins_l%i' % t2sl_i, ref_img)
 
     # Mask to sample veins
     phys_var_z = np.array(phys_var_zs).max(0)
@@ -946,8 +938,8 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi
     phys_art = np.setdiff1d(nc[andb([phys_var_z > 3.5,
                                      Kappas < minK_ign]) == 2], group0)
     phys_art = np.union1d(np.setdiff1d(nc[andb([phys_var_z > 2,
-                                                (rankvec(phys_var_z) -
-                                                 rankvec(Kappas)) > newcest / 2,
+                                                (stats.rankdata(phys_var_z) -
+                                                 stats.rankdata(Kappas)) > newcest / 2,
                                                 Vz2 > -1]) == 3],
                                        group0), phys_art)
     # Want to replace field_art with an acf/SVM based approach
@@ -955,8 +947,8 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi
     field_art = np.setdiff1d(nc[andb([mmix_kurt_z_max > 5,
                                       Kappas < minK_ign]) == 2], group0)
     field_art = np.union1d(np.setdiff1d(nc[andb([mmix_kurt_z_max > 2,
-                                           (rankvec(mmix_kurt_z_max) -
-                                            rankvec(Kappas)) > newcest / 2,
+                                           (stats.rankdata(mmix_kurt_z_max) -
+                                            stats.rankdata(Kappas)) > newcest / 2,
                                            Vz2 > 1, Kappas < F01]) == 4],
                                         group0), field_art)
     field_art = np.union1d(np.setdiff1d(nc[andb([mmix_kurt_z_max > 3, Vz2 > 3,
@@ -964,7 +956,7 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi
                                         group0), field_art)
     field_art = np.union1d(np.setdiff1d(nc[andb([mmix_kurt_z_max > 5, Vz2 > 5]) == 2],
                                         group0), field_art)
-    misc_art = np.setdiff1d(nc[andb([(rankvec(Vz) - rankvec(Ktz)) > newcest / 2,
+    misc_art = np.setdiff1d(nc[andb([(stats.rankdata(Vz) - stats.rankdata(Ktz)) > newcest / 2,
                             Kappas < Khighelbowval]) == 2], group0)
     ign_cand = np.unique(list(field_art)+list(phys_art)+list(misc_art))
     midkrej = np.union1d(midk, rej)
@@ -1002,7 +994,7 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi
     return list(sorted(ncl)), list(sorted(rej)), list(sorted(midk)), list(sorted(ign))
 
 
-def tedpca(combmode, mask, stabilize, head, tes, kdaw, rdaw, ste=0,
+def tedpca(catd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, ste=0,
            mlepca=True):
     n_samp, n_echos, n_vols = catd.shape
     ste = np.array([int(ee) for ee in str(ste).split(',')])
@@ -1039,19 +1031,11 @@ def tedpca(combmode, mask, stabilize, head, tes, kdaw, rdaw, ste=0,
         sp = s / s.sum()
         eigelb = getelbow_mod(sp, val=True)
 
-        spdif = np.abs(sp[1:] - sp[:-1])
-        spdifh = spdif[(spdif.shape[0]//2):]
-        spdmin = spdif.min()
-        spdthr = np.mean([spdifh.max(), spdmin])
-        spmin = sp[(spdif.shape[0]//2) +
-                   (np.arange(spdifh.shape[0])[spdifh >= spdthr][0]) +
-                   1]
-        spcum = []
-        spcumv = 0
-        for sss in sp:
-            spcumv += sss
-            spcum.append(spcumv)
-        spcum = np.array(spcum)
+        spdif = np.abs(np.diff(sp))
+        spdifh = spdif[(len(spdif)//2):]
+        spdthr = np.mean([spdifh.max(), spdif.min()])
+        spmin = sp[(len(spdif)//2) + np.arange(len(spdifh))[spdifh >= spdthr][0] + 1]
+        spcum = np.cumsum(sp)
 
         # Compute K and Rho for PCA comps
         eimum = np.atleast_2d(eim)
@@ -1064,7 +1048,7 @@ def tedpca(combmode, mask, stabilize, head, tes, kdaw, rdaw, ste=0,
         vTmix = v.T
         vTmixN = ((vTmix.T - vTmix.T.mean(0)) / vTmix.T.std(0)).T
         _, ctb, betasv, v_T = fitmodels_direct(catd, v.T, eimum, t2s, t2sG,
-                                               tes, combmode, head,
+                                               tes, combmode, ref_img,
                                                mmixN=vTmixN, full_sel=False)
         ctb = ctb[ctb[:, 0].argsort(), :]
         ctb = np.vstack([ctb.T[:3], sp]).T
@@ -1127,8 +1111,8 @@ def tedpca(combmode, mask, stabilize, head, tes, kdaw, rdaw, ste=0,
     lgr.info('--Selected {0} components. Minimum Kappa={1:.02f} '
              'Rho={2:.02f}'.format(n_components, kappa_thr, rho_thr))
 
-    dd = ((dd.T - dd.T.mean(0)) / dd.T.std(0)).T  # Variance normalize timeseries
-    dd = (dd - dd.mean()) / dd.std()  # Variance normalize everything
+    dd = stats.zscore(dd.T, axis=0).T  # variance normalize timeseries
+    dd = stats.zscore(dd, axis=None)  # variance normalize everything
 
     return n_components, dd
 
@@ -1139,6 +1123,7 @@ def tedica(n_components, dd, conv, fixed_seed, cost, final_cost):
     time series dataset from `tedpca`. Output is comptable, mmix, smaps
     from ICA, and betas from fitting catd to mmix.
     """
+
     import mdp
     climit = float(conv)
     mdp.numx_rand.seed(fixed_seed)
@@ -1152,7 +1137,7 @@ def tedica(n_components, dd, conv, fixed_seed, cost, final_cost):
     return mmix
 
 
-def gscontrol_raw(catd, OCcatd, head, n_echos, dtrank=4):
+def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4):
     """
     This function uses the spatial global signal estimation approach to
     modify catd (global variable) to removal global signal out of individual
@@ -1164,27 +1149,26 @@ def gscontrol_raw(catd, OCcatd, head, n_echos, dtrank=4):
     lgr.info('++ Applying amplitude-based T1 equilibration correction')
 
     # Legendre polynomial basis for denoising
-    Lmix = np.array([lpmv(0, vv, np.linspace(-1, 1, OCcatd.shape[-1])) for vv in range(dtrank)]).T
+    bounds = np.linspace(-1, 1, optcom.shape[-1])
+    Lmix = np.column_stack([lpmv(0, vv, bounds) for vv in range(dtrank)])
 
     # compute mean, std, mask local to this function
     # inefficient, but makes this function a bit more modular
-    Gmu = OCcatd.mean(axis=-1)  # temporal mean
+    Gmu = optcom.mean(axis=-1)  # temporal mean
     Gmask = Gmu != 0
 
-    # Find spatial global signal
-    # BUG: this is indexing differently!!!!! and the subtraction is causing differences
-    dat = OCcatd[Gmask] - Gmu[Gmask][:, np.newaxis]
-    # ^^^ THIS IS THE BAD PLACE
+    # find spatial global signal
+    dat = optcom[Gmask] - Gmu[Gmask][:, np.newaxis]
     sol = np.linalg.lstsq(Lmix, dat.T)[0]  # Legendre basis for detrending
     detr = dat - np.dot(sol.T, Lmix.T)[0]
     sphis = (detr).min(axis=1)
     sphis -= sphis.mean()
-    # niwrite(unmask(sphis, Gmask), aff, 'T1gs.nii', head)  # FIXME
+    filewrite(unmask(sphis, Gmask), 'T1gs', ref_img)
 
-    # Find time course ofc the spatial global signal
+    # find time course ofc the spatial global signal
     # make basis with the Legendre basis
     glsig = np.linalg.lstsq(np.atleast_2d(sphis).T, dat)[0]
-    glsig = (glsig - glsig.mean()) / glsig.std()
+    glsig = stats.zscore(glsig, axis=None)
     np.savetxt('glsig.1D', glsig)
     glbase = np.hstack([Lmix, glsig.T])
 
@@ -1193,42 +1177,43 @@ def gscontrol_raw(catd, OCcatd, head, n_echos, dtrank=4):
     tsoc_nogs = dat - np.dot(np.atleast_2d(sol[dtrank]).T,
                              np.atleast_2d(glbase.T[dtrank])) + Gmu[Gmask][:, np.newaxis]
 
-    # niwrite(OCcatd, aff, 'tsoc_orig.nii', head)  # FIXME
-    OCcatd = unmask(tsoc_nogs, Gmask)
-    # niwrite(OCcatd, aff, 'tsoc_nogs.nii', head)  # FIXME
+    filewrite(optcom, 'tsoc_orig', ref_img)
+    optcom = unmask(tsoc_nogs, Gmask)
+    filewrite(optcom, 'tsoc_nogs.nii', ref_img)
 
     # Project glbase out of each echo
+    dm_catd = catd.copy()  # don't overwrite catd
     for echo in range(n_echos):
-        dat = catd[:, echo, :][Gmask]
+        dat = dm_catd[:, echo, :][Gmask]
         sol = np.linalg.lstsq(np.atleast_2d(glbase), dat.T)[0]
         e_nogs = dat - np.dot(np.atleast_2d(sol[dtrank]).T,
                               np.atleast_2d(glbase.T[dtrank]))
-        catd[:, echo, :] = unmask(e_nogs, Gmask)
+        dm_catd[:, echo, :] = unmask(e_nogs, Gmask)
 
-    return catd, OCcatd
+    return dm_catd, optcom
 
 
-def gscontrol_mmix(mmix, acc, rej, midk, empty, head):
+def gscontrol_mmix(mmix, acc, rej, midk, empty, ref_img):
 
-    Gmu = OCcatd.mean(-1)
-    Gstd = OCcatd.std(-1)
-    Gmask = Gmu != 0
+    Gmu = OCcatd.mean(axis=-1)
+    Gstd = OCcatd.std(axis=-1)
+    Gmask = (Gmu != 0)
 
     """
     Compute temporal regression
     """
     dat = (OCcatd[Gmask] - Gmu[Gmask][:, np.newaxis]) / Gstd[mask][:, np.newaxis]
-    solG = np.linalg.lstsq(mmix, dat.T)
-    resid = dat - np.dot(solG[0].T, mmix.T)
+    solG = np.linalg.lstsq(mmix, dat.T)[0]
+    resid = dat - np.dot(solG.T, mmix.T)
 
     """
     Build BOLD time series without amplitudes, and save T1-like effect
     """
-    bold_ts = np.dot(solG[0].T[:, acc], mmix[:, acc].T)
-    sphis = bold_ts.min(-1)
+    bold_ts = np.dot(solG.T[:, acc], mmix[:, acc].T)
+    sphis = bold_ts.min(axis=-1)
     sphis -= sphis.mean()
     lgr.info(sphis.shape)
-    niwrite(unmask(sphis, mask), aff, 'sphis_hik.nii', head)
+    filewrite(unmask(sphis, mask), 'sphis_hik', ref_img)
 
     """
     Find the global signal based on the T1-like effect
@@ -1240,14 +1225,14 @@ def gscontrol_mmix(mmix, acc, rej, midk, empty, head):
     T1 correct time series by regression
     """
     bold_noT1gs = bold_ts - np.dot(np.linalg.lstsq(glsig.T, bold_ts.T)[0].T, glsig)
-    niwrite(unmask(bold_noT1gs*Gstd[mask][:, np.newaxis], mask),
-            aff, 'hik_ts_OC_T1c.nii', head)
+    filewrite(unmask(bold_noT1gs * Gstd[mask][:, np.newaxis], mask),
+              'hik_ts_OC_T1c.nii', ref_img)
 
     """
     Make medn version of T1 corrected time series
     """
-    niwrite(Gmu[:, :, :, np.newaxis] + unmask((bold_noT1gs+resid)*Gstd[mask][:, np.newaxis], mask),
-            aff, 'dn_ts_OC_T1c.nii', head)
+    filewrite(Gmu[..., np.newaxis] + unmask((bold_noT1gs+resid)*Gstd[mask][:, np.newaxis], mask),
+              'dn_ts_OC_T1c', ref_img)
 
     """
     Orthogonalize mixing matrix w.r.t. T1-GS
@@ -1255,20 +1240,20 @@ def gscontrol_mmix(mmix, acc, rej, midk, empty, head):
     mmixnogs = mmix.T - np.dot(np.linalg.lstsq(glsig.T, mmix)[0].T, glsig)
     mmixnogs_mu = mmixnogs.mean(-1)
     mmixnogs_std = mmixnogs.std(-1)
-    mmixnogs_norm = (mmixnogs-mmixnogs_mu[:, np.newaxis])/mmixnogs_std[:, np.newaxis]
+    mmixnogs_norm = (mmixnogs - mmixnogs_mu[:, np.newaxis]) / mmixnogs_std[:, np.newaxis]
     mmixnogs_norm = np.vstack([np.atleast_2d(np.ones(max(glsig.shape))), glsig, mmixnogs_norm])
 
     """
     Write T1-GS corrected components and mixing matrix
     """
     sol = np.linalg.lstsq(mmixnogs_norm.T, dat.T)
-    niwrite(unmask(sol[0].T[:, 2:], mask), aff, 'betas_hik_OC_T1c.nii', head)
+    filewrite(unmask(sol[0].T[:, 2:], mask), 'betas_hik_OC_T1c', ref_img)
     np.savetxt('meica_mix_T1c.1D', mmixnogs)
 
 
-def write_split_ts(data, comptable, mmix, acc, rej, midk, head, suffix=''):
+def write_split_ts(data, comptable, mmix, acc, rej, midk, ref_img, suffix=''):
     mdata = fmask(data, mask)
-    betas = fmask(get_coeffs(unmask((mdata.T-mdata.T.mean(0)).T, mask),
+    betas = fmask(get_coeffs(unmask((mdata.T - mdata.T.mean(0)).T, mask),
                              mask, mmix), mask)
     dmdata = mdata.T-mdata.T.mean(0)
     varexpl = (1-((dmdata.T-betas.dot(mmix.T))**2.).sum()/(dmdata**2.).sum())*100
@@ -1276,23 +1261,21 @@ def write_split_ts(data, comptable, mmix, acc, rej, midk, head, suffix=''):
     midkts = betas[:, midk].dot(mmix.T[midk, :])
     lowkts = betas[:, rej].dot(mmix.T[rej, :])
     if len(acc) != 0:
-        niwrite(unmask(betas[:, acc].dot(mmix.T[acc, :]), mask),
-                aff, 'hik_ts_{0}.nii'.format(suffix), head)
+        filewrite(unmask(betas[:, acc].dot(mmix.T[acc, :]), mask),
+                  'hik_ts_{0}'.format(suffix), ref_img)
     if len(midk) != 0:
-        niwrite(unmask(midkts, mask), aff, 'midk_ts_{0}.nii'.format(suffix),
-                head)
+        filewrite(unmask(midkts, mask), 'midk_ts_{0}'.format(suffix), ref_img)
     if len(rej) != 0:
-        niwrite(unmask(lowkts, mask), aff, 'lowk_ts_{0}.nii'.format(suffix),
-                head)
-    niwrite(unmask(fmask(data, mask)-lowkts-midkts, mask), aff,
-            'dn_ts_{0}.nii'.format(suffix), head)
+        filewrite(unmask(lowkts, mask), 'lowk_ts_{0}'.format(suffix), ref_img)
+    filewrite(unmask(data[mask] - lowkts - midkts, mask),
+              'dn_ts_{0}'.format(suffix), ref_img)
     return varexpl
 
 
-def writefeats(data, mmix, mask, head, suffix=''):
+def writefeats(data, mmix, mask, ref_img, suffix=''):
     # Write feature versions of components
     feats = computefeats2(data, mmix, mask)
-    niwrite(unmask(feats, mask), aff, 'feats_{0}.nii'.format(suffix), head)
+    filewrite(unmask(feats, mask), 'feats_{0}'.format(suffix), ref_img)
 
 
 def writect(comptable, nt, acc, rej, midk, empty, ctname='', varexpl='-1'):
@@ -1341,32 +1324,32 @@ def writect(comptable, nt, acc, rej, midk, empty, ctname='', varexpl='-1'):
                                                   sortab[i, 4]))
 
 
-def writeresults(OCcatd, comptable, mmix, nt, acc, rej, midk, empty, head):
+def writeresults(OCcatd, comptable, mmix, nt, acc, rej, midk, empty, ref_img):
     lgr.info('++ Writing optimally combined time series')
     ts = OCcatd
-    niwrite(ts, aff, 'ts_OC.nii', head)
+    filewrite(ts, 'ts_OC', ref_img)
     print("++ Writing Kappa-filtered optimally combined timeseries")
-    varexpl = write_split_ts(ts, comptable, mmix, acc, rej, midk, head,
+    varexpl = write_split_ts(ts, comptable, mmix, acc, rej, midk, ref_img,
                              suffix='OC')
     print("++ Writing signal versions of components")
     ts_B = get_coeffs(ts, mask, mmix)
-    niwrite(ts_B[:, :, :, :], aff, 'betas_OC.nii', head)
+    filewrite(ts_B[:, :, :, :], 'betas_OC', ref_img)
 
     if len(acc) != 0:
-        niwrite(ts_B[:, :, :, acc], aff, 'betas_hik_OC.nii', head)
+        filewrite(ts_B[:, :, :, acc], 'betas_hik_OC', ref_img)
         print("++ Writing optimally combined high-Kappa features")
         writefeats(split_ts(ts, comptable, mmix, acc, rej, midk)[0],
-                   mmix[:, acc], mask, head, suffix='OC2')
+                   mmix[:, acc], mask, ref_img, suffix='OC2')
     print("++ Writing component table")
     writect(comptable, nt, acc, rej, midk, empty, ctname='comp_table.txt',
             varexpl=varexpl)
 
 
-def writeresults_echoes(acc, rej, midk, head, comptable, mmix, n_echos):
+def writeresults_echoes(acc, rej, midk, ref_img, comptable, mmix, n_echos):
     for i_echo in range(n_echos):
         print("++ Writing Kappa-filtered TE#%i timeseries" % (i_echo+1))
         write_split_ts(catd[:, :, :, i_echo, :], comptable, mmix,
-                       acc, rej, midk, head, suffix='e%i' % (i_echo+1))
+                       acc, rej, midk, ref_img, suffix='e%i' % (i_echo+1))
 
 
 def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False,
@@ -1377,8 +1360,8 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False,
     """
     Parameters
     ----------
-    data : :obj:`str` or :obj:`list` of :obj:`str`
-        Either a single z-concatenated file (str or single-entry list) or a
+    data : :obj:`list` of :obj:`str`
+        Either a single z-concatenated file (single-entry list) or a
         list of echo-specific files, in ascending order.
     tes : :obj:`list`
         List of echo times associated with data in milliseconds.
@@ -1426,49 +1409,23 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False,
     fixed_seed : :obj:`int`, optional
         Seeded value for ICA, for reproducibility.
     """
-    global catd, head, aff
-    tes = [float(te) for te in tes]
-    n_echos = len(tes)
 
-    # TODO: attempt to derive input data format as soon as possible
-    # we'll need to carry this through to writing out all the resultant output
-    # files for the rest of the script; options should include .nii and .gii
-    #
-    # output_type = get_input_type(options.data)
+    global catd, ref_img
 
-    if isinstance(data, str):
-        catim = nib.load(data)
-    elif len(data) == 1:
-        catim = nib.load(data[0])
-    else:
-        if len(data) != n_echos:
-            raise ValueError('Number of single-echo "data" files does not '
-                             'match number of echos '
-                             '({0} != {1})'.format(len(data), n_echos))
-        imgs = [nib.load(f) for f in data]
-        if not np.array_equal([img.affine for img in imgs]):
-            raise ValueError('All affines from files in "data" must be equal.')
-        zcat_data = np.dstack([img.get_data() for img in imgs])
-        catim = nib.Nifti1Image(zcat_data, imgs[0].affine,
-                                header=imgs[0].get_header())
-
-    # Prepare image metadata for output files
-    head = catim.get_header()
-    head.extensions = []
-    head.set_sform(head.get_sform(), code=1)
-    aff = catim.affine
+    # ensure tes are in appropriate format
+    tes = [float(te) for te in tes]
+    n_echos = len(tes)
 
     # coerce data to samples x echos x time array
-    catd = cat2echos(data, n_echos=n_echos)
+    catd, ref_img = load_data(data, n_echos=n_echos)
     n_samp, n_echos, n_vols = catd.shape
 
     if fout:
-        fout = aff
+        fout = ref_img
     else:
         fout = None
 
-    kdaw = float(kdaw)
-    rdaw = float(rdaw)
+    kdaw, rdaw = float(kdaw), float(rdaw)
 
     if label is not None:
         out_dir = 'TED.{0}'.format(label)
@@ -1498,52 +1455,57 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False,
 
     lgr.info('++ Computing T2* map')
     global t2s, s0, t2sG
-    t2s, s0, t2ss, s0s, t2sG, s0G = t2sadmap(catd, tes, mask, masksum,
+    t2s, s0, t2ss, s0s, t2sG, s0G = t2sadmap(catd, tes,
+                                             mask, masksum,
                                              start_echo=1)
 
     # set a hard cap for the T2* map
-    # anything that is 10x higher than the 99.5 %ile will be reset to
+    # anything that is 10x higher than the 99.5 %ile will be reset to 99.5 %ile
     cap_t2s = stats.scoreatpercentile(t2s.flatten(), 99.5,
                                       interpolation_method='lower')
-    t2s[t2s > cap_t2s*10] = cap_t2s
-    niwrite(s0, aff, op.join(out_dir, 's0v.nii'), head)
-    niwrite(t2s, aff, op.join(out_dir, 't2sv.nii'), head)
-    niwrite(t2ss, aff, op.join(out_dir, 't2ss.nii'), head)
-    niwrite(s0s, aff, op.join(out_dir, 's0vs.nii'), head)
-    niwrite(s0G, aff, op.join(out_dir, 's0vG.nii'), head)
-    niwrite(t2sG, aff, op.join(out_dir, 't2svG.nii'), head)
-
-    # Optimally combine data
+    t2s[t2s > cap_t2s * 10] = cap_t2s
+    filewrite(s0, op.join(out_dir, 's0v'), ref_img)
+    filewrite(t2s, op.join(out_dir, 't2sv'), ref_img)
+    filewrite(t2ss, op.join(out_dir, 't2ss'), ref_img)
+    filewrite(s0s, op.join(out_dir, 's0vs'), ref_img)
+    filewrite(s0G, op.join(out_dir, 's0vG'), ref_img)
+    filewrite(t2sG, op.join(out_dir, 't2svG'), ref_img)
+
+    # optimally combine data
     global OCcatd
-    OCcatd = optcom(catd, t2sG, tes, mask, combmode)
+    OCcatd = make_optcom(catd, t2sG, tes, mask, combmode)
+
+    # regress out global signal unless explicitly not desired
     if not no_gscontrol:
-        catd, OCcatd = gscontrol_raw(catd, OCcatd, head, n_echos)
+        catd, OCcatd = gscontrol_raw(catd, OCcatd, n_echos, ref_img)
 
     if mixm is None:
         lgr.info("++ Doing ME-PCA and ME-ICA")
-        n_components, dd = tedpca(catd, combmode, mask, stabilize, head,
+        n_components, dd = tedpca(catd, combmode, mask, stabilize, ref_img,
                                   tes=tes, kdaw=kdaw, rdaw=rdaw, ste=ste)
         mmix_orig = tedica(n_components, dd, conv, fixed_seed, cost=initcost,
                            final_cost=finalcost)
         np.savetxt(op.join(out_dir, '__meica_mix.1D'), mmix_orig)
         seldict, comptable, betas, mmix = fitmodels_direct(catd, mmix_orig,
                                                            mask, t2s, t2sG,
-                                                           tes, combmode, head,
+                                                           tes, combmode,
+                                                           ref_img,
                                                            fout=fout,
                                                            reindex=True)
         np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix)
 
-        acc, rej, midk, empty = selcomps(seldict, mmix, head, manacc, n_echos,
+        acc, rej, midk, empty = selcomps(seldict, mmix, ref_img, manacc, n_echos,
                                          strict_mode=strict,
                                          filecsdata=filecsdata)
     else:
         mmix_orig = np.loadtxt(op.join(out_dir, 'meica_mix.1D'))
         seldict, comptable, betas, mmix = fitmodels_direct(catd, mmix_orig,
                                                            mask, t2s, t2sG,
-                                                           tes, combmode, head,
+                                                           tes, combmode,
+                                                           ref_img,
                                                            fout=fout)
         if ctab is None:
-            acc, rej, midk, empty = selcomps(seldict, mmix, head, manacc,
+            acc, rej, midk, empty = selcomps(seldict, mmix, ref_img, manacc,
                                              n_echos,
                                              filecsdata=filecsdata,
                                              strict_mode=strict)
@@ -1554,7 +1516,7 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False,
         lgr.info('** WARNING! No BOLD components detected!!! \n'
                  '** Please check data and results!')
 
-    writeresults(OCcatd, comptable, mmix, n_vols, acc, rej, midk, empty, head)
-    gscontrol_mmix(mmix, acc, rej, midk, empty, head)
+    writeresults(OCcatd, comptable, mmix, n_vols, acc, rej, midk, empty, ref_img)
+    gscontrol_mmix(mmix, acc, rej, midk, empty, ref_img)
     if dne:
-        writeresults_echoes(acc, rej, midk, head, comptable, mmix, n_echos)
+        writeresults_echoes(acc, rej, midk, ref_img, comptable, mmix, n_echos)
diff --git a/tedana/tests/test_utils.py b/tedana/tests/test_utils.py
index 899f71018..14ab70305 100644
--- a/tedana/tests/test_utils.py
+++ b/tedana/tests/test_utils.py
@@ -48,10 +48,6 @@ def test_fitgaussian():
     pass
 
 
-def test_niwrite():
-    pass
-
-
 def test_dice():
     pass
 
diff --git a/tedana/utils/__init__.py b/tedana/utils/__init__.py
index 4059ccb24..fb85ab8e7 100644
--- a/tedana/utils/__init__.py
+++ b/tedana/utils/__init__.py
@@ -2,13 +2,15 @@
 # ex: set sts=4 ts=4 sw=4 et:
 
 from .utils import (
-    cat2echos, uncat2echos, make_min_mask,
-    makeadmask, fmask, unmask,
-    fitgaussian, niwrite, dice, andb,
+    load_image, load_data,
+    make_min_mask, makeadmask,
+    fmask, unmask, filewrite,
+    fitgaussian, dice, andb,
 )
 
 
 __all__ = [
-    'cat2echos', 'uncat2echos', 'make_min_mask',
-    'makeadmask', 'fmask', 'unmask',
-    'fitgaussian', 'niwrite', 'dice', 'andb']
+    'load_image', 'load_data',
+    'make_min_mask', 'makeadmask',
+    'fmask', 'unmask', 'filewrite',
+    'fitgaussian', 'dice', 'andb']
diff --git a/tedana/utils/utils.py b/tedana/utils/utils.py
index 08af43602..a090b95db 100644
--- a/tedana/utils/utils.py
+++ b/tedana/utils/utils.py
@@ -1,13 +1,52 @@
 """Utilities for meica package"""
+import os.path as op
 import numpy as np
 import nibabel as nib
 from nibabel.filename_parser import splitext_addext
+from nilearn.image import new_img_like
 from nilearn._utils import check_niimg
 import nilearn.masking as nimask
 from scipy.optimize import leastsq
 
 from ..due import due, BibTeX
 
+FORMATS = {'.nii': 'NIFTI',
+           '.gii': 'GIFTI'}
+
+
+def get_dtype(data):
+    """
+    Determines neuroimaging format of `data`
+
+    Parameters
+    ----------
+    data : list-of-str or str or img_like
+        Data to determine format of
+
+    Returns
+    -------
+    dtype : {'NIFTI', 'GIFTI', 'OTHER'} str
+        Format of input data
+    """
+
+    if isinstance(data, list):
+        dtypes = np.unique([splitext_addext(d)[1] for d in data])
+        if dtypes.size > 1:
+            raise ValueError('Provided data detected to have varying formats: '
+                             '{}'.format(dtypes))
+        dtype = dtypes[0]
+    elif isinstance(data, str):
+        dtype = splitext_addext(data)[1]
+    else:  # img_like?
+        if not hasattr(data, 'valid_exts'):
+            raise TypeError('Input data format cannot be detected.')
+        dtype = data.valid_exts[0]
+
+    if dtype in FORMATS.keys():
+        return FORMATS[dtype]
+
+    return 'OTHER'
+
 
 def load_image(data):
     """
@@ -15,7 +54,7 @@ def load_image(data):
 
     Parameters
     ----------
-    data : (X x Y x Z [x T]) array_like or niimg-like object
+    data : (X x Y x Z [x T]) array_like or img_like object
         Data array or data file to be loaded / reshaped
 
     Returns
@@ -25,25 +64,24 @@ def load_image(data):
     """
 
     if isinstance(data, str):
-        root, ext, addext = splitext_addext(data)
-        if ext == '.gii':
+        if get_dtype(data) == 'GIFTI':
             fdata = np.column_stack([f.data for f in nib.load(data).darrays])
             return fdata
-        else:
+        elif get_dtype(data) == 'NIFTI':
             data = check_niimg(data).get_data()
 
-    fdata = data.reshape((-1,) + data.shape[3:], order='F')
+    fdata = data.reshape((-1,) + data.shape[3:])
 
     return fdata.squeeze()
 
 
-def cat2echos(data, n_echos=None):
+def load_data(data, n_echos=None):
     """
     Coerces input `data` files to required 3D array output
 
     Parameters
     ----------
-    data : (X x Y x M x T) array_like or list-of-img-like
+    data : (X x Y x M x T) array_like or list-of-img_like
         Input multi-echo data array, where `X` and `Y` are spatial dimensions,
         `M` is the Z-spatial dimensions with all the input echos concatenated,
         and `T` is time. A list of image-like objects (e.g., .nii or .gii) are
@@ -56,31 +94,35 @@ def cat2echos(data, n_echos=None):
     -------
     fdata : (S x E x T) np.ndarray
         Output data where `S` is samples, `E` is echos, and `T` is time
+    ref_img : str
+        Filepath to reference image for saving output files
     """
 
-    # data files were provided
     if isinstance(data, list):
-        # individual echo files were provided
-        if len(data) > 2:
-            fdata = np.stack([load_image(f) for f in data], axis=1)
-            if fdata.ndim < 3:
-                fdata = fdata[..., np.newaxis]
-            return fdata
-        # a z-concatenated file was provided; load data and pipe it down
-        elif len(data) == 1:
+        if get_dtype(data) == 'GIFTI':  # TODO: deal with L/R split GIFTI files
+            pass
+        if len(data) == 1:  # a z-concatenated file was provided
             if n_echos is None:
                 raise ValueError('Number of echos `n_echos` must be specified '
                                  'if z-concatenated data file provided.')
-            data = check_niimg(data[0]).get_data()
-        # only two echo files were provided, which doesn't fly
-        else:
+            data = data[0]
+        elif len(data) == 2:  # inviable -- need more than 2 echos
             raise ValueError('Cannot run `tedana` with only two echos: '
                              '{}'.format(data))
+        else:  # individual echo files were provided
+            fdata = np.stack([load_image(f) for f in data], axis=1)
+            return np.atleast_3d(fdata), data[0]
 
-    (nx, ny), nz = data.shape[:2], data.shape[2] // n_echos
-    fdata = load_image(data.reshape(nx, ny, nz, n_echos, -1, order='F'))
+    # we have a z-cat file
+    img = check_niimg(data)
+    (nx, ny), nz = img.shape[:2], img.shape[2] // n_echos
+    fdata = load_image(img.get_data().reshape(nx, ny, nz, n_echos, -1))
 
-    return fdata
+    # create reference image
+    ref_img = img.__class__(np.zeros((nx, ny, nz)), affine=img.affine,
+                            header=img.header, extra=img.extra)
+
+    return fdata, ref_img
 
 
 def makeadmask(data, minimum=True, getsum=False):
@@ -140,7 +182,7 @@ def make_min_mask(data):
     """
     Generates a 3D mask of `data`
 
-    Only voxels that are consistently (i.e., across time AND echoes) non-zero
+    Only samples that are consistently (i.e., across time AND echoes) non-zero
     in `data` are True in output
 
     Parameters
@@ -159,34 +201,165 @@ def make_min_mask(data):
     return data.prod(axis=-1).prod(axis=-1).astype(bool)
 
 
-def get_input_type(input):
-    pass
+def filewrite(data, filename, ref_img, gzip=False, copy_header=True,
+              copy_meta=False):
+    """
+    Writes `data` to `filename` in format of `ref_img`
+
+    If `ref_img` dtype is GIFTI, then `data` is assumed to be stacked L/R
+    hemispheric and will be split and saved as two files
+
+    Parameters
+    ----------
+    data : (S [x T]) array_like
+        Data to be saved
+    filename : str
+        Filepath where data should be saved to
+    ref_img : str or img_like
+        Reference image
+    gzip : bool, optional
+        Whether to gzip output (if not specified in `filename`). Only applies
+        if output dtype is NIFTI. Default: False
+    copy_header : bool, optional
+        Whether to copy header from `ref_img` to new image. Default: True
+    copy_meta : bool, optional
+        Whether to copy meta from `ref_img` to new image. Only applies if
+        output dtype is GIFTI. Default: False
+
+    Returns
+    -------
+    name : str
+        Path of saved image (with added extensions, as appropriate)
+    """
+
+    # get datatype and reference image for comparison
+    dtype = get_dtype(ref_img)
+    if isinstance(ref_img, list):
+        ref_img = ref_img[0]
+
+    # ensure that desired output type (from name) is compatible with `dtype`
+    root, ext, add = splitext_addext(filename)
+    if ext != '' and FORMATS[ext] != dtype:
+        raise ValueError('Cannot write {} data to {} file. Please ensure file'
+                         'formats are compatible'.format(dtype, FORMATS[ext]))
+
+    if dtype == 'NIFTI':
+        out = new_nii_like(ref_img, data,
+                           copy_header=copy_header)
+        name = '{}.{}'.format(root, 'nii.gz' if gzip else 'nii')
+        out.to_filename(name)
+    elif dtype == 'GIFTI':
+        # remove possible hemispheric denotations from root
+        root = op.join(op.dirname(root), op.basename(root).split('.')[0])
+        # save hemispheres separately
+        for n, (hdata, hemi) in enumerate(zip(np.split(data, 2, axis=0),
+                                              ['L', 'R'])):
+            out = new_gii_like(ref_img[n], hdata,
+                               copy_header=copy_header,
+                               copy_meta=copy_meta)
+            name = '{}.{}.func.gii'.format(root, hemi)
+            out.to_filename(name)
+
+    return name
+
+
+def new_nii_like(ref_img, data, copy_header=True):
+    """
+    Coerces `data` into NiftiImage format like `ref_img`
+
+    Parameters
+    ----------
+    ref_img : str or img_like
+        Reference image
+    data : (S [x T]) array_like
+        Data to be saved
+    copy_header : bool, optional
+        Whether to copy header from `ref_img` to new image. Default: True
+
+    Returns
+    -------
+    nii : nib.nifti.NiftiXImage
+        NiftiImage
+    """
+
+    ref_img = check_niimg(ref_img)
+    nii = new_img_like(ref_img,
+                       data.reshape(ref_img.shape[:3] + data.shape[1:]),
+                       copy_header=copy_header)
+    nii.set_data_dtype(data.dtype)
+
+    return nii
 
 
-def niwrite(data, affine, name, head, outtype='.nii.gz'):
+def new_gii_like(ref_img, data, copy_header=True, copy_meta=False):
     """
-    Write out nifti file.
+    Coerces `data` into GiftiImage format like `ref_img`
 
     Parameters
     ----------
-    data : array_like
-    affine : (4 x 4) array_like
-        Affine for output file
-    name : str
-        Name to save output file to
-    head : object
-    outtype : str, optional
-        Output type of file. Default: '.nii.gz'
+    ref_img : str or img_like
+        Reference image
+    data : (S [x T]) array_like
+        Data to be saved
+    copy_header : bool, optional
+        Whether to copy header from `ref_img` to new image. Default: True
+    copy_meta : bool, optional
+        Whether to copy meta from `ref_img` to new image. Default: False
+
+    Returns
+    -------
+    gii : nib.gifti.GiftiImage
+        GiftiImage
+    """
+
+    if isinstance(ref_img, str):
+        ref_img = nib.load(ref_img)
+
+    if data.ndim == 1:
+        data = np.atleast_2d(data).T
+
+    darrays = [make_gii_darray(ref_img.darrays[n], d, copy_meta=copy_meta)
+               for n, d in enumerate(data.T)]
+    gii = nib.gifti.GiftiImage(header=ref_img.header if copy_header else None,
+                               extra=ref_img.extra,
+                               meta=ref_img.meta if copy_meta else None,
+                               labeltable=ref_img.labeltable,
+                               darrays=darrays)
+
+    return gii
+
+
+def make_gii_darray(ref_array, data, copy_meta=False):
     """
+    Converts `data` into GiftiDataArray format like `ref_array`
+
+    Parameters
+    ----------
+    ref_array : str or img_like
+        Reference array
+    data : (S,) array_like
+        Data to be saved
+    copy_meta : bool, optional
+        Whether to copy meta from `ref_img` to new image. Default: False
+
+    Returns
+    -------
+    gii : nib.gifti.GiftiDataArray
+        Output data array instance
+    """
+
+    if not isinstance(ref_array, nib.gifti.GiftiDataArray):
+        raise TypeError('Provided reference is not a GiftiDataArray.')
+    darray = nib.gifti.GiftiDataArray(data,
+                                      intent=ref_array.intent,
+                                      datatype=data.dtype,
+                                      encoding=ref_array.encoding,
+                                      endian=ref_array.endian,
+                                      coordsys=ref_array.coordsys,
+                                      ordering=ref_array.ind_ord,
+                                      meta=ref_array.meta if copy_meta else None)
 
-    # get rid of NaN
-    data[np.isnan(data)] = 0
-    # set header info
-    header = head.copy()
-    header.set_data_shape(list(data.shape))
-    outni = nib.Nifti1Image(data, affine, header=header)
-    outni.set_data_dtype('float64')
-    outni.to_filename(name)
+    return darray
 
 
 def uncat2echos(data):
@@ -209,7 +382,7 @@ def uncat2echos(data):
                          'provided data has only {0}'.format(data.ndim))
 
     (nx, ny), nz = data.shape[:2], np.prod(data.shape[2:4])
-    return data.reshape(nx, ny, nz, -1, order='F')
+    return data.reshape(nx, ny, nz, -1)
 
 
 def fmask(data, mask=None):
@@ -218,9 +391,9 @@ def fmask(data, mask=None):
 
     Parameters
     ----------
-    data : (X x Y x Z [x E [x T]) array_like or niimg-like object
+    data : (X x Y x Z [x E [x T]) array_like or img_like object
         Data array or data file to be masked
-    mask : (X x Y x Z) array_like or niimg-like object
+    mask : (X x Y x Z) array_like or img_like object
         Boolean array or mask file
 
     Returns

From 9785e424831ffc68ea4fdc25af2b90a22fd2a5de Mon Sep 17 00:00:00 2001
From: Elizabeth DuPre <emd222@cornell.edu>
Date: Mon, 7 May 2018 14:47:56 -0400
Subject: [PATCH 06/18] Update csstepdata format as JSON

---
 tedana/interfaces/tedana.py | 38 +++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py
index b39d32efe..7f5c1c3df 100644
--- a/tedana/interfaces/tedana.py
+++ b/tedana/interfaces/tedana.py
@@ -1,5 +1,6 @@
 import os
 import os.path as op
+import json
 import shutil
 import pickle
 import textwrap
@@ -801,15 +802,15 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi
                                     np.union1d(group0, rej))
             min_acc = np.union1d(group0, toacc_hi)
             to_clf = np.setdiff1d(nc, np.union1d(min_acc, rej))
-        diagstepkeys = ['rej', 'KRcut', 'Kcut', 'Rcut', 'dbscanfailed',
-                        'midkfailed', 'KRguess', 'group0', 'min_acc',
-                        'toacc_hi']
-        diagstepout = []
-        for ddk in diagstepkeys:
-            diagstepout.append("%s: %s" % (ddk, eval('str(%s)' % ddk)))
+        diagstep_keys = [rej, KRcut, Kcut, Rcut, dbscanfailed,
+                         midkfailed, KRguess, min_acc, toacc_hi]
+        diagstep_vals = ['Rejected components', 'Kappa-Rho cut point',
+                         'Kappa cut point', 'Rho cut point', 'DBSCAN failed to converge',
+                         'Mid-Kappa failed (limited BOLD signal)', 'Kappa-Rho guess',
+                         'min_acc', 'toacc_hi']
+
         with open('csstepdata.txt', 'w') as ofh:
-            ofh.write('\n'.join(diagstepout))
-        ofh.close()
+            json.dumps(dict(zip(diagstep_keys, diagstep_vals)), ofh)
         return list(sorted(min_acc)), list(sorted(rej)), [], list(sorted(to_clf))
 
     # Find additional components to reject based on Dice - doing this here
@@ -979,15 +980,20 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi
         orphan = np.setdiff1d(nc, list(ncl) + list(to_ign) + list(midk) + list(rej))
 
     if savecsdiag:
-        diagstepkeys = ['rej', 'KRcut', 'Kcut', 'Rcut', 'dbscanfailed',
-                        'KRguess', 'group0', 'dice_rej', 'rej_supp', 'to_clf',
-                        'midk', 'svm_acc_fail', 'toacc_hi', 'toacc_lo',
-                        'field_art', 'phys_art', 'misc_art', 'ncl', 'ign']
-        diagstepout = []
-        for ddk in diagstepkeys:
-            diagstepout.append("%s: %s" % (ddk, eval('str(%s)' % ddk)))
+
+        diagstep_keys = ['Rejected components', 'Kappa-Rho cut point', 'Kappa cut',
+                         'Rho cut', 'DBSCAN failed to converge', 'Kappa-Rho guess',
+                         'Dice rejected', 'rej_supp', 'to_clf',
+                         'Mid-kappa components', 'svm_acc_fail', 'toacc_hi', 'toacc_lo',
+                         'Field artifacts', 'Physiological artifacts',
+                         'Miscellaneous artifacts', 'ncl', 'Ignored components']
+        diagstep_vals = [rej, KRcut, Kcut, Rcut, dbscanfailed,
+                         KRguess, dice_rej, rej_supp, to_clf,
+                         midk, svm_acc_fail, toacc_hi, toacc_lo,
+                         field_art, phys_art, misc_art, ncl, ign]
+
         with open('csstepdata.txt', 'w') as ofh:
-            ofh.write('\n'.join(diagstepout))
+            json.dumps(dict(zip(diagstep_keys, diagstep_vals)), ofh)
         allfz = np.array([Tz, Vz, Ktz, KRr, cnz, Rz, mmix_kurt, fdist_z])
         np.savetxt('csdata.txt', allfz)
 

From a4d9f5b214ebd799aaf1a430292ce43202616819 Mon Sep 17 00:00:00 2001
From: Elizabeth DuPre <emd222@cornell.edu>
Date: Mon, 7 May 2018 14:55:56 -0400
Subject: [PATCH 07/18] Remove currently unused imports for linting

---
 tedana/tests/test_utils.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tedana/tests/test_utils.py b/tedana/tests/test_utils.py
index 14ab70305..8a027a858 100644
--- a/tedana/tests/test_utils.py
+++ b/tedana/tests/test_utils.py
@@ -2,11 +2,6 @@
 Tests for tedana.utils
 """
 
-import os.path
-from tedana import utils
-import nibabel as nb
-import numpy as np
-
 
 def test_load_image():
     pass

From e9994cddea8cc067f2089ea86af9ca4e4d5ea8fd Mon Sep 17 00:00:00 2001
From: Elizabeth DuPre <emd222@cornell.edu>
Date: Mon, 7 May 2018 15:21:31 -0400
Subject: [PATCH 08/18] Address review comments

---
 tedana/interfaces/tedana.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py
index 7f5c1c3df..d81915b3a 100644
--- a/tedana/interfaces/tedana.py
+++ b/tedana/interfaces/tedana.py
@@ -810,7 +810,7 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi
                          'min_acc', 'toacc_hi']
 
         with open('csstepdata.txt', 'w') as ofh:
-            json.dumps(dict(zip(diagstep_keys, diagstep_vals)), ofh)
+            json.dump(dict(zip(diagstep_keys, diagstep_vals)), ofh, indent=4, sort_keys=True)
         return list(sorted(min_acc)), list(sorted(rej)), [], list(sorted(to_clf))
 
     # Find additional components to reject based on Dice - doing this here
@@ -993,7 +993,7 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi
                          field_art, phys_art, misc_art, ncl, ign]
 
         with open('csstepdata.txt', 'w') as ofh:
-            json.dumps(dict(zip(diagstep_keys, diagstep_vals)), ofh)
+            json.dump(dict(zip(diagstep_keys, diagstep_vals)), ofh, indent=4, sort_keys=True)
         allfz = np.array([Tz, Vz, Ktz, KRr, cnz, Rz, mmix_kurt, fdist_z])
         np.savetxt('csdata.txt', allfz)
 

From 047242d443fb5ffce6a1bf3c1bb931b66a71c6d7 Mon Sep 17 00:00:00 2001
From: Elizabeth DuPre <emd222@cornell.edu>
Date: Mon, 7 May 2018 15:35:40 -0400
Subject: [PATCH 09/18] Patch errors

---
 tedana/interfaces/tedana.py | 6 +++---
 tedana/utils/__init__.py    | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py
index d81915b3a..3c060e731 100644
--- a/tedana/interfaces/tedana.py
+++ b/tedana/interfaces/tedana.py
@@ -802,12 +802,12 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi
                                     np.union1d(group0, rej))
             min_acc = np.union1d(group0, toacc_hi)
             to_clf = np.setdiff1d(nc, np.union1d(min_acc, rej))
-        diagstep_keys = [rej, KRcut, Kcut, Rcut, dbscanfailed,
-                         midkfailed, KRguess, min_acc, toacc_hi]
-        diagstep_vals = ['Rejected components', 'Kappa-Rho cut point',
+        diagstep_keys = ['Rejected components', 'Kappa-Rho cut point',
                          'Kappa cut point', 'Rho cut point', 'DBSCAN failed to converge',
                          'Mid-Kappa failed (limited BOLD signal)', 'Kappa-Rho guess',
                          'min_acc', 'toacc_hi']
+        diagstep_vals = [rej, KRcut, Kcut, Rcut, dbscanfailed,
+                         midkfailed, KRguess, min_acc, toacc_hi]
 
         with open('csstepdata.txt', 'w') as ofh:
             json.dump(dict(zip(diagstep_keys, diagstep_vals)), ofh, indent=4, sort_keys=True)
diff --git a/tedana/utils/__init__.py b/tedana/utils/__init__.py
index fb85ab8e7..771178fd5 100644
--- a/tedana/utils/__init__.py
+++ b/tedana/utils/__init__.py
@@ -2,7 +2,7 @@
 # ex: set sts=4 ts=4 sw=4 et:
 
 from .utils import (
-    load_image, load_data,
+    load_image, load_data, get_dtype,
     make_min_mask, makeadmask,
     fmask, unmask, filewrite,
     fitgaussian, dice, andb,
@@ -10,7 +10,7 @@
 
 
 __all__ = [
-    'load_image', 'load_data',
+    'load_image', 'load_data', 'get_dtype'
     'make_min_mask', 'makeadmask',
     'fmask', 'unmask', 'filewrite',
     'fitgaussian', 'dice', 'andb']

From bb7eb175ce02027585f55b5ae8296d369f16b778 Mon Sep 17 00:00:00 2001
From: Ross Markello <rossmarkello@gmail.com>
Date: Tue, 8 May 2018 00:03:15 -0400
Subject: [PATCH 10/18] [DOC] Doc-strings galore! Very basic but a start

Added incredibly basic doc-strings to most of the functions in tedana.py
---
 tedana/interfaces/tedana.py | 592 +++++++++++++++++++++++++++++-------
 1 file changed, 480 insertions(+), 112 deletions(-)

diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py
index 3c060e731..37f2ac3e8 100644
--- a/tedana/interfaces/tedana.py
+++ b/tedana/interfaces/tedana.py
@@ -37,29 +37,28 @@
 
 def do_svm(X_train, y_train, X_test, svmtype=0):
     """
-    sklearn's Support Vector Classification (SVC).
-    For svmtype=1, implemented in liblinear rather than libsvm.
+    Implements Support Vector Classification on provided data
 
     Parameters
     ----------
-    X_train : {array-like, sparse matrix}, shape (n_samples, n_features)
+    X_train : (N1 x F) array_like
         Training vectors, where n_samples is the number of samples in the
         training dataset and n_features is the number of features.
-    y_train : array-like, shape (n_samples,)
+    y_train : (N1,) array_like
         Target values (class labels in classification, real numbers in
         regression)
-    X_test : {array-like, sparse matrix}, shape (n_samples, n_features)
+    X_test : (N2 x F) array_like
         Test vectors, where n_samples is the number of samples in the test
         dataset and n_features is the number of features.
     svmtype : int
-        Desired support vector machine type.
+        Desired support vector machine type
 
     Returns
     -------
-    y_pred : array, shape (n_samples,)
-        Predicted class labels for samples in X_test.
+    y_pred : (N2,) np.ndarray
+        Predicted class labels for samples in `X_test`
     clf : {:obj:`sklearn.svm.classes.SVC`, :obj:`sklearn.svm.classes.LinearSVC`}
-        Trained sklearn model instance.
+        Trained sklearn model instance
     """
 
     if svmtype == 0:
@@ -69,7 +68,7 @@ def do_svm(X_train, y_train, X_test, svmtype=0):
     elif svmtype == 2:
         clf = svm.SVC(kernel='linear', probability=True)
     else:
-        raise ValueError('Input svmtype not in range (3)')
+        raise ValueError('Input svmtype not in [1, 2, 3]')
 
     clf.fit(X_train, y_train)
     y_pred = clf.predict(X_test)
@@ -84,19 +83,28 @@ def spatclust(data, mask, csize, thr, ref_img, infile=None, dindex=0,
 
     Parameters
     ----------
-    data : array_like
-    mask :
+    data : (S x T) array-like
+        Input data array
+    mask : (S,) array-like
+        Boolean mask array
     csize : int
-    thr :
-    header :
-    aff :
-    infile :
-    dindex :
-    tindex :
+        Size of cluster (in voxels) to retain
+    thr : float
+        Value to threshold image at before clustering
+    ref_img : str or img_like
+        Reference image to dictate how outputs are saved to disk
+    infile : str, optional
+        Path to file that should be used for clustering instead of `data`.
+        Default: None
+    dindex : int, optional
+        Index of data (2nd dimension) to use for clustering. Default: 0
+    tindex : int, optional
+        Index of data (2nd dimension) to use for thresholding. Default: 0
 
     Returns
     -------
-    clustered :
+    clustered : (S x T) np.ndarray
+        Boolean array indicated data samples to be retained after clustering
     """
 
     if infile is None:
@@ -121,7 +129,7 @@ def spatclust(data, mask, csize, thr, ref_img, infile=None, dindex=0,
 
 def get_coeffs(data, mask, X, add_const=False):
     """
-    get_coeffs(data, X)
+    Performs least-squares fit of `X` against `data`
 
     Parameters
     ----------
@@ -130,33 +138,33 @@ def get_coeffs(data, mask, X, add_const=False):
     mask : (S,) array-like
         Boolean mask array
     X : (T x C) array-like
-        Array where `T` is time and `C` is components
+        Array where `T` is time and `C` is predictor variables
     add_const : bool, optional
         Add intercept column to `X` before fitting. Default: False
 
     Returns
     -------
-    out : (S x C) np.ndarray
-        Array of betas for all samples `S`
+    betas : (S x C) np.ndarray
+        Array of `S` sample betas for `C` predictors
     """
 
+    # mask data and flip (time x samples)
     mdata = data[mask].T
 
-    # Coerce X to >=2d
+    # coerce X to >=2d if single variable supplies
     X = np.atleast_2d(X)
-
     if X.shape[0] == 1:
         X = X.T
-    if add_const:  # add intercept
+    if add_const:  # add intercept, if specified
         Xones = np.ones((np.min(mdata.shape), 1))
         X = np.column_stack([X, Xones])
 
-    tmpbetas = np.linalg.lstsq(X, mdata)[0].T
+    betas = np.linalg.lstsq(X, mdata)[0].T
     if add_const:  # drop beta for intercept
-        tmpbetas = tmpbetas[:, :-1]
-    out = unmask(tmpbetas, mask)
+        betas = betas[:, :-1]
+    betas = unmask(betas, mask)
 
-    return out
+    return betas
 
 
 def getelbow_cons(ks, val=False):
@@ -165,13 +173,13 @@ def getelbow_cons(ks, val=False):
 
     Parameters
     ----------
-    ks : array-like
+    ks : array_like
     val : bool, optional
         Return the value of the elbow instead of the index. Default: False
 
     Returns
     -------
-    array-like
+    int or float
         Either the elbow index (if val is True) or the values at the elbow
         index (if val is False)
     """
@@ -201,13 +209,13 @@ def getelbow_mod(ks, val=False):
 
     Parameters
     ----------
-    ks : array-like
+    ks : array_like
     val : bool, optional
         Return the value of the elbow instead of the index. Default: False
 
     Returns
     -------
-    array-like
+    int or float
         Either the elbow index (if val is True) or the values at the elbow
         index (if val is False)
     """
@@ -234,14 +242,13 @@ def getelbow_aggr(ks, val=False):
 
     Parameters
     ----------
-    ks : array-like
-
+    ks : array_like
     val : bool, optional
         Default is False
 
     Returns
     -------
-    array-like
+    int or float
         Either the elbow index (if val is True) or the values at the elbow
         index (if val is False)
     """
@@ -287,57 +294,130 @@ def getfbounds(n_echos):
 def eimask(dd, ees=None):
     """
     Returns mask for data between [0.001, 5] * 98th percentile of dd
+
+    Parameters
+    ----------
+    dd : (S x E x T) array_like
+        Input data, where `S` is samples, `E` is echos, and `T` is time
+    ees : (N,) list
+        Indices of echos to assess from `dd` in calculating output
+
+    Returns
+    -------
+    imask : (S x N) np.ndarray
+        Boolean array denoting
     """
 
     if ees is None:
         ees = range(dd.shape[1])
     imask = np.zeros([dd.shape[0], len(ees)], dtype=bool)
     for ee in ees:
-        lgr.info(ee)
+        lgr.info('++ Creating eimask for echo {}'.format(ee))
         perc98 = stats.scoreatpercentile(dd[:, ee, :].flatten(), 98,
                                          interpolation_method='lower')
         lthr, hthr = 0.001 * perc98, 5 * perc98
-        lgr.info(lthr, hthr)
+        lgr.info('++ Eimask threshold boundaries: {}'.format([lthr, hthr]))
         m = dd[:, ee, :].mean(axis=1)
         imask[np.logical_and(m > lthr, m < hthr), ee] = True
 
     return imask
 
 
-def split_ts(data, comptable, mmix, acc, rej, midk):
-    cbetas = get_coeffs(data-data.mean(-1)[:, :, :, np.newaxis], mask, mmix)
-    betas = fmask(cbetas, mask)
+def split_ts(data, mmix, mask, acc):
+    """
+    Splits `data` time series into accepted component time series and remainder
+
+    Parameters
+    ----------
+    data : (S x T) array_like
+        Input data, where `S` is samples and `T` is time
+    mmix : (T x C) array_like
+        Mixing matrix for converting input data to component space, where `C`
+        is components and `T` is the same as in `data`
+    mask : (S,) array_like
+        Boolean mask array
+    acc : list
+        List of accepted components used to subset `mmix`
+
+    Returns
+    -------
+    hikts : (S x T) np.ndarray
+        Time series reconstructed using only components in `acc`
+    rest : (S x T) np.ndarray
+        Original data with `hikts` removed
+    """
+
+    cbetas = get_coeffs(data - data.mean(axis=-1, keepdims=True), mask, mmix)
+    betas = cbetas[mask]
     if len(acc) != 0:
         hikts = unmask(betas[:, acc].dot(mmix.T[acc, :]), mask)
     else:
         hikts = None
-    return hikts, data-hikts
+
+    return hikts, data - hikts
 
 
 def computefeats2(data, mmix, mask, normalize=True):
-    # Write feature versions of components
-    data = data[mask]
-    # demean data
-    data_vn = stats.zscore(data, axis=-1)
-    # get betas for demeaned data against `mmix`
+    """
+    Converts `data` to component space using `mmix`
+
+    Parameters
+    ----------
+    data : (S x T) array_like
+        Input data
+    mmix : (T x C) array_like
+        Mixing matrix for converting input data to component space, where `C`
+        is components and `T` is the same as in `data`
+    mask : (S,) array-like
+        Boolean mask array
+    normalize : bool, optional
+        Whether to z-score output. Default: True
+
+    Returns
+    -------
+    data_Z : (S x C) np.ndarray
+        Data in component space
+    """
+
+    # demean masked data
+    data_vn = stats.zscore(data[mask], axis=-1)
+
+    # get betas of `data`~`mmix` and limit to range [-0.999, 0.999]
     data_R = get_coeffs(unmask(data_vn, mask), mask, mmix)[mask]
-    # cap betas to range [-0.999, 0.999]
     data_R[data_R < -0.999] = -0.999
     data_R[data_R > 0.999] = 0.999
-    # R-to-Z transform?
+
+    # R-to-Z transform
     data_Z = np.arctanh(data_R)
     if len(data_Z.shape) == 1:
         data_Z = np.atleast_2d(data_Z).T
+
+    # normalize data
     if normalize:
-        # standardize
         data_Zm = stats.zscore(data_Z, axis=0)
-        # add back (mean / stdev)
-        data_Z = data_Zm + (data_Z.mean(axis=0, keepdims=True) / data_Z.std(axis=0, keepdims=True))
+        data_Z = data_Zm + (data_Z.mean(axis=0, keepdims=True) /
+                            data_Z.std(axis=0, keepdims=True))
     return data_Z
 
 
 def ctabsel(ctabfile):
-    ctlines = open(ctabfile).readlines()
+    """
+    Loads a pre-existing component table file
+
+    Parameters
+    ----------
+    ctabfile : str
+        Filepath to existing component table
+
+    Returns
+    -------
+    ctab : (4,) tuple-of-arrays
+        Tuple containing arrays of (1) accepted, (2) rejected, (3) mid, and (4)
+        ignored components
+    """
+
+    with open(ctabfile, 'r') as src:
+        ctlines = src.readlines()
     class_tags = ['#ACC', '#REJ', '#MID', '#IGN']
     class_dict = {}
     for ii, ll in enumerate(ctlines):
@@ -353,21 +433,41 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img,
     Parameters
     ----------
     catd : (S x E x T) array_like
+        Input data, where `S` is samples, `E` is echos, and `T` is time
     mmix : (T x C) array_like
+        Mixing matrix for converting input data to component space, where `C`
+        is components and `T` is the same as in `catd`
     mask : (S,) array_like
+        Boolean mask array
     t2s : (S,) array_like
     t2sG : (S,) array_like
-    tes : (E,) list
-    combmode : str
+    tes : list
+        List of echo times associated with `catd`, in milliseconds
+    combmode : {'t2s', 'ste'} str
+        How optimal combination of echos should be made, where 't2s' indicates
+        using the method of Posse 1999 and 'ste' indicates using the method of
+        Poser 2006
     ref_img : str or img_like
+        Reference image to dictate how outputs are saved to disk
     fout : bool
-        Whether to output per-component TE-dependencen maps Default: None
+        Whether to output per-component TE-dependence maps. Default: None
     reindex : bool, optional
         Default: False
     mmixN : array_like, optional
         Default: None
     full_sel : bool, optional
+        Whether to perform selection of components based on Rho/Kappa scores.
         Default: True
+
+    Returns
+    -------
+    seldict : dict
+    comptab : (N x 5) np.ndarray
+        Array with columns denoting (1) index of component, (2) Kappa score of
+        component, (3) Rho score of component, (4) variance explained by
+        component, and (5) normalized variance explained bycomponent
+    betas : np.ndarray
+    mmix_new : np.ndarray
     """
 
     # compute optimal combination of raw data
@@ -552,11 +652,47 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img,
     return seldict, comptab, betas, mmix_new
 
 
-def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversion=99,
-             filecsdata=False, savecsdiag=True,
-             strict_mode=False):
+def selcomps(seldict, mmix, ref_img, manacc, n_echos, debug=False, olevel=2, oversion=99,
+             filecsdata=False, savecsdiag=True, strict_mode=False):
     """
-    Select components.
+    Labels components in `mmix`
+
+    Parameters
+    ----------
+    seldict : dict
+        As output from `fitmodels_direct`
+    mmix : (C x T) array_like
+        Mixing matrix for converting input data to component space, where `C`
+        is components and `T` is the number of volumes in the original data
+    ref_img
+        Reference image to dictate how outputs are saved to disk
+    manacc
+        Comma-separated list of indices of manually accepted components
+    n_echos : int
+        Number of echos in original data
+    debug : bool, optional
+        Default: False
+    olevel : int, optional
+        Default: 2
+    oversion : int, optional
+        Default: 99
+    filecsdata: bool, optional
+        Default: False
+    savecsdiag: bool, optional
+        Default: True
+    strict_mode: bool, optional
+        Default: False
+
+    Returns
+    -------
+    acc : list
+        Indices of accepted (BOLD) components in `mmix`
+    rej : list
+        Indices of rejected (non-BOLD) components in `mmix`
+    midk : list
+        Indices of mid-K (questionable) components in `mmix`
+    ign : list
+        Indices of ignored components in `mmix`
     """
     if filecsdata:
         import bz2
@@ -1002,6 +1138,47 @@ def selcomps(seldict, mmix, head, manacc, n_echos, debug=False, olevel=2, oversi
 
 def tedpca(catd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, ste=0,
            mlepca=True):
+    """
+    Performs PCA on `catd` and uses TE-dependence to dimensionally reduce data
+
+    Parameters
+    ----------
+    catd : (S x E x T) array_like
+        Input functional data
+    combmode : {'t2s', 'ste'} str
+        How optimal combination of echos should be made, where 't2s' indicates
+        using the method of Posse 1999 and 'ste' indicates using the method of
+        Poser 2006
+    mask : (S,) array_like
+        Boolean mask array
+    stabilize : bool
+        Whether to attempt to stabilize convergence of ICA by returning
+        dimensionally-reduced data from PCA and component selection.
+    ref_img : str or img_like
+        Reference image to dictate how outputs are saved to disk
+    tes : list
+        List of echo times associated with `catd`, in milliseconds
+    kdaw : float
+        Dimensionality augmentation weight for Kappa calculations
+    rdaw : float
+        Dimensionality augmentation weight for Rho calculations
+    ste : int or list-of-int, optional
+        Which echos to use in PCA. Values -1 and 0 are special, where a value
+        of -1 will indicate using all the echos and 0 will indicate using the
+        optimal combination of the echos. A list can be provided to indicate
+        a subset of echos. Default: 0
+    mlepca : bool, optional
+        Whether to use the method originally explained in Minka, NIPS 2000 for
+        guessing PCA dimensionality instead of a traditional SVD. Default: True
+
+    Returns
+    -------
+    n_components : int
+        Number of components retained from PCA decomposition
+    dd : (S x E x T) np.ndarray
+        Dimensionally-reduced functional data
+    """
+
     n_samp, n_echos, n_vols = catd.shape
     ste = np.array([int(ee) for ee in str(ste).split(',')])
 
@@ -1022,7 +1199,7 @@ def tedpca(catd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, ste=0,
     dz = (dz - dz.mean()) / dz.std()  # var normalize everything
 
     if not op.exists('pcastate.pkl'):
-        # Do PC dimension selection and get eigenvalue cutoff
+        # do PC dimension selection and get eigenvalue cutoff
         if mlepca:
             from sklearn.decomposition import PCA
             ppca = PCA(n_components='mle', svd_solver='full')
@@ -1067,7 +1244,7 @@ def tedpca(catd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, ste=0,
             with open('pcastate.pkl', 'wb') as handle:
                 pickle.dump(pcastate, handle)
         except TypeError:
-            lgr.info('Could not save PCA solution!')
+            lgr.warning('Could not save PCA solution.')
 
     else:  # if loading existing state
         lgr.info('Loading PCA')
@@ -1125,9 +1302,33 @@ def tedpca(catd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, ste=0,
 
 def tedica(n_components, dd, conv, fixed_seed, cost, final_cost):
     """
-    Input is dimensionally reduced spatially concatenated multi-echo
-    time series dataset from `tedpca`. Output is comptable, mmix, smaps
-    from ICA, and betas from fitting catd to mmix.
+    Performs ICA on `dd` and returns mixing matrix
+
+    Parameters
+    ----------
+    n_components : int
+        Number of components retained from PCA decomposition
+    dd : (S x E x T) np.ndarray
+        Dimensionally-reduced functional data, where `S` is samples, `E` is
+        echos, and `T` is time
+    conv : float
+        Convergence limit for ICA
+    fixed_seed : int
+        Seed for ensuring reproducibility of ICA results
+    initcost : {'tanh', 'pow3', 'gaus', 'skew'} str, optional
+        Initial cost function for ICA
+    finalcost : {'tanh', 'pow3', 'gaus', 'skew'} str, optional
+        Final cost function for ICA
+
+    Returns
+    -------
+    mmix : (C x T) np.ndarray
+        Mixing matrix for converting input data to component space, where `C`
+        is components and `T` is the same as in `dd`
+
+    Notes
+    -----
+    Uses `mdp` implementation of FastICA for decomposition
     """
 
     import mdp
@@ -1139,17 +1340,41 @@ def tedica(n_components, dd, conv, fixed_seed, cost, final_cost):
     icanode.train(dd)
     smaps = icanode.execute(dd)  # noqa
     mmix = icanode.get_recmatrix().T
-    mmix = (mmix-mmix.mean(0))/mmix.std(0)
+    mmix = stats.zscore(mmix, axis=0)
     return mmix
 
 
 def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4):
     """
+    Removes global signal from individual echo `catd` and `optcom` time series
+
     This function uses the spatial global signal estimation approach to
-    modify catd (global variable) to removal global signal out of individual
-    echo time series datasets. The spatial global signal is estimated
-    from the optimally combined data after detrending with a Legendre
-    polynomial basis of `order = 0` and `degree = dtrank`.
+    to removal global signal out of individual echo time series datasets. The
+    spatial global signal is estimated from the optimally combined data after
+    detrending with a Legendre polynomial basis of `order = 0` and
+    `degree = dtrank`.
+
+    Parameters
+    ----------
+    catd : (S x E x T) array_like
+        Input functional data
+    optcom : (S x T) array_like
+        Optimally-combined functional data (i.e., the output of
+        `tedana.interfaces.t2smap.make_optcom`)
+    n_echos : int
+        Number of echos in data. Should be the same as `E` dimension of `catd`
+    ref_img : str or img_like
+        Reference image to dictate how outputs are saved to disk
+    dtrank : int, optional
+        Specfies degree of Legendre polynomial basis function for estimating
+        spatial global signal. Default: 4
+
+    Returns
+    -------
+    dm_catd : (S x E x T) array_like
+        Input `catd` with global signal removed from time series
+    dm_optcom : (S x T) array_like
+        Input `optcom` with global signal removed from time series
     """
 
     lgr.info('++ Applying amplitude-based T1 equilibration correction')
@@ -1184,8 +1409,8 @@ def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4):
                              np.atleast_2d(glbase.T[dtrank])) + Gmu[Gmask][:, np.newaxis]
 
     filewrite(optcom, 'tsoc_orig', ref_img)
-    optcom = unmask(tsoc_nogs, Gmask)
-    filewrite(optcom, 'tsoc_nogs.nii', ref_img)
+    dm_optcom = unmask(tsoc_nogs, Gmask)
+    filewrite(dm_optcom, 'tsoc_nogs.nii', ref_img)
 
     # Project glbase out of each echo
     dm_catd = catd.copy()  # don't overwrite catd
@@ -1196,10 +1421,27 @@ def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4):
                               np.atleast_2d(glbase.T[dtrank]))
         dm_catd[:, echo, :] = unmask(e_nogs, Gmask)
 
-    return dm_catd, optcom
+    return dm_catd, dm_optcom
 
 
-def gscontrol_mmix(mmix, acc, rej, midk, empty, ref_img):
+def gscontrol_mmix(OCcatd, mmix, acc, rej, midk, ref_img):
+    """
+    Parameters
+    ----------
+    OCcatd : (S x T) array_like
+        Optimally-combined time series data
+    mmix : (C x T) array_like
+        Mixing matrix for converting input data to component space, where `C`
+        is components and `T` is the same as in `OCcatd`
+    acc : list
+        Indices of accepted (BOLD) components in `mmix`
+    rej : list
+        Indices of rejected (non-BOLD) components in `mmix`
+    midk : list
+        Indices of mid-K (questionable) components in `mmix`
+    ref_img : str or img_like
+        Reference image to dictate how outputs are saved to disk
+    """
 
     Gmu = OCcatd.mean(axis=-1)
     Gstd = OCcatd.std(axis=-1)
@@ -1257,38 +1499,118 @@ def gscontrol_mmix(mmix, acc, rej, midk, empty, ref_img):
     np.savetxt('meica_mix_T1c.1D', mmixnogs)
 
 
-def write_split_ts(data, comptable, mmix, acc, rej, midk, ref_img, suffix=''):
-    mdata = fmask(data, mask)
-    betas = fmask(get_coeffs(unmask((mdata.T - mdata.T.mean(0)).T, mask),
-                             mask, mmix), mask)
-    dmdata = mdata.T-mdata.T.mean(0)
-    varexpl = (1-((dmdata.T-betas.dot(mmix.T))**2.).sum()/(dmdata**2.).sum())*100
+def write_split_ts(data, mmix, acc, rej, midk, ref_img, suffix=''):
+    """
+    Splits `data` into denoised / noise / ignored time series and saves to disk
+
+    Parameters
+    ----------
+    data : (S x T) array_like
+        Input time series
+    mmix : (C x T) array_like
+        Mixing matrix for converting input data to component space, where `C`
+        is components and `T` is the same as in `data`
+    acc : list
+        Indices of accepted (BOLD) components in `mmix`
+    rej : list
+        Indices of rejected (non-BOLD) components in `mmix`
+    midk : list
+        Indices of mid-K (questionable) components in `mmix`
+    ref_img : str or img_like
+        Reference image to dictate how outputs are saved to disk
+    suffix : str, optional
+        Appended to name of saved files (before extension). Default: ''
+
+    Returns
+    -------
+    varexpl : float
+        Percent variance of data explained by extracted + retained components
+    """
+
+    # mask and de-mean data
+    mdata = data[mask]
+    dmdata = mdata.T - mdata.T.mean(axis=0)
+
+    # get variance explained by retained components
+    betas = get_coeffs(unmask(dmdata.T, mask), mask, mmix)[mask]
+    varexpl = (1 - ((dmdata.T - betas.dot(mmix.T))**2.).sum() / (dmdata**2.).sum()) * 100
     lgr.info('Variance explained: ', varexpl, '%')
+
+    # create component and de-noised time series and save to files
+    hikts = betas[:, acc].dot(mmix.T[acc, :])
     midkts = betas[:, midk].dot(mmix.T[midk, :])
     lowkts = betas[:, rej].dot(mmix.T[rej, :])
+    dnts = data[mask] - lowkts - midkts
     if len(acc) != 0:
-        filewrite(unmask(betas[:, acc].dot(mmix.T[acc, :]), mask),
-                  'hik_ts_{0}'.format(suffix), ref_img)
+        filewrite(unmask(hikts, mask), 'hik_ts_{0}'.format(suffix), ref_img)
     if len(midk) != 0:
         filewrite(unmask(midkts, mask), 'midk_ts_{0}'.format(suffix), ref_img)
     if len(rej) != 0:
         filewrite(unmask(lowkts, mask), 'lowk_ts_{0}'.format(suffix), ref_img)
-    filewrite(unmask(data[mask] - lowkts - midkts, mask),
-              'dn_ts_{0}'.format(suffix), ref_img)
+    filewrite(unmask(dnts, mask), 'dn_ts_{0}'.format(suffix), ref_img)
+
     return varexpl
 
 
 def writefeats(data, mmix, mask, ref_img, suffix=''):
-    # Write feature versions of components
-    feats = computefeats2(data, mmix, mask)
-    filewrite(unmask(feats, mask), 'feats_{0}'.format(suffix), ref_img)
+    """
+    Converts `data` to component space with `mmix` and saves to disk
 
+    Parameters
+    ----------
+    data : (S x T) array_like
+        Input time series
+    mmix : (C x T) array_like
+        Mixing matrix for converting input data to component space, where `C`
+        is components and `T` is the same as in `data`
+    mask : (S,) array_like
+        Boolean mask array
+    ref_img : str or img_like
+        Reference image to dictate how outputs are saved to disk
+    suffix : str, optional
+        Appended to name of saved files (before extension). Default: ''
+
+    Returns
+    -------
+    fname : str
+        Filepath to saved file
+    """
+
+    # write feature versions of components
+    feats = unmask(computefeats2(data, mmix, mask), mask)
+    fname = filewrite(feats, 'feats_{0}'.format(suffix), ref_img)
+
+    return fname
+
+
+def writect(comptable, n_vols, acc, rej, midk, empty, ctname='comp_table.txt', varexpl='-1'):
+    """
+    Saves component table to disk
+
+    Parameters
+    ----------
+    comptable : (N x 5) array_like
+        Array with columns denoting (1) index of component, (2) Kappa score of
+        component, (3) Rho score of component, (4) variance explained by
+        component, and (5) normalized variance explained by component
+    n_vols : int
+        Number of volumes in original time series
+    acc : list
+        Indices of accepted (BOLD) components in `mmix`
+    rej : list
+        Indices of rejected (non-BOLD) components in `mmix`
+    midk : list
+        Indices of mid-K (questionable) components in `mmix`
+    empty : list
+        Indices of ignored components in `mmix`
+    ctname : str, optional
+        Filename to save comptable to disk. Default 'comp_table.txt'
+    varexpl : str
+        Variance explained by original data
+    """
 
-def writect(comptable, nt, acc, rej, midk, empty, ctname='', varexpl='-1'):
     n_components = comptable.shape[0]
     sortab = comptable[comptable[:, 1].argsort()[::-1], :]
-    if ctname is '':
-        ctname = 'comp_table.txt'
     open('accepted.txt', 'w').write(','.join([str(int(cc)) for cc in acc]))
     open('rejected.txt', 'w').write(','.join([str(int(cc)) for cc in rej]))
     open('midk_rejected.txt',
@@ -1299,7 +1621,7 @@ def writect(comptable, nt, acc, rej, midk, empty, ctname='', varexpl='-1'):
                           n_components=n_components,
                           dfe=len(acc),
                           rjn=len(midk) + len(rej),
-                          dfn=nt - len(midk) - len(rej),
+                          dfn=n_vols - len(midk) - len(rej),
                           acc=','.join([str(int(cc)) for cc in acc]),
                           rej=','.join([str(int(cc)) for cc in rej]),
                           mid=','.join([str(int(cc)) for cc in midk]),
@@ -1330,32 +1652,78 @@ def writect(comptable, nt, acc, rej, midk, empty, ctname='', varexpl='-1'):
                                                   sortab[i, 4]))
 
 
-def writeresults(OCcatd, comptable, mmix, nt, acc, rej, midk, empty, ref_img):
+def writeresults(ts, mask, comptable, mmix, n_vols, acc, rej, midk, empty, ref_img):
+    """
+    Denoises `ts` and saves all resulting files to disk
+
+    Parameters
+    ----------
+    ts : (S x T) array_like
+        Time series to denoise and save to disk
+    mask : (S,) array_like
+        Boolean mask array
+    comptable : (N x 5) array_like
+        Array with columns denoting (1) index of component, (2) Kappa score of
+        component, (3) Rho score of component, (4) variance explained by
+        component, and (5) normalized variance explained by component
+    mmix : (C x T) array_like
+        Mixing matrix for converting input data to component space, where `C`
+        is components and `T` is the same as in `data`
+    acc : list
+        Indices of accepted (BOLD) components in `mmix`
+    rej : list
+        Indices of rejected (non-BOLD) components in `mmix`
+    midk : list
+        Indices of mid-K (questionable) components in `mmix`
+    empty : list
+        Indices of ignored components in `mmix`
+    ref_img : str or img_like
+        Reference image to dictate how outputs are saved to disk
+    """
+
     lgr.info('++ Writing optimally combined time series')
-    ts = OCcatd
     filewrite(ts, 'ts_OC', ref_img)
-    print("++ Writing Kappa-filtered optimally combined timeseries")
-    varexpl = write_split_ts(ts, comptable, mmix, acc, rej, midk, ref_img,
-                             suffix='OC')
-    print("++ Writing signal versions of components")
+    lgr.info("++ Writing Kappa-filtered optimally combined timeseries")
+    varexpl = write_split_ts(ts, mmix, acc, rej, midk, ref_img, suffix='OC')
+    lgr.info("++ Writing signal versions of components")
     ts_B = get_coeffs(ts, mask, mmix)
-    filewrite(ts_B[:, :, :, :], 'betas_OC', ref_img)
+    filewrite(ts_B, 'betas_OC', ref_img)
 
     if len(acc) != 0:
-        filewrite(ts_B[:, :, :, acc], 'betas_hik_OC', ref_img)
-        print("++ Writing optimally combined high-Kappa features")
-        writefeats(split_ts(ts, comptable, mmix, acc, rej, midk)[0],
+        filewrite(ts_B[:, acc], 'betas_hik_OC', ref_img)
+        lgr.info("++ Writing optimally combined high-Kappa features")
+        writefeats(split_ts(ts, mmix, mask, acc)[0],
                    mmix[:, acc], mask, ref_img, suffix='OC2')
-    print("++ Writing component table")
-    writect(comptable, nt, acc, rej, midk, empty, ctname='comp_table.txt',
+    lgr.info("++ Writing component table")
+    writect(comptable, n_vols, acc, rej, midk, empty, ctname='comp_table.txt',
             varexpl=varexpl)
 
 
-def writeresults_echoes(acc, rej, midk, ref_img, comptable, mmix, n_echos):
-    for i_echo in range(n_echos):
-        print("++ Writing Kappa-filtered TE#%i timeseries" % (i_echo+1))
-        write_split_ts(catd[:, :, :, i_echo, :], comptable, mmix,
-                       acc, rej, midk, ref_img, suffix='e%i' % (i_echo+1))
+def writeresults_echoes(catd, mmix, acc, rej, midk, ref_img):
+    """
+    Saves individually denoised echos to disk
+
+    Parameters
+    ----------
+    catd : (S x E x T) array_like
+        Input data time series
+    mmix : (C x T) array_like
+        Mixing matrix for converting input data to component space, where `C`
+        is components and `T` is the same as in `data`
+    acc : list
+        Indices of accepted (BOLD) components in `mmix`
+    rej : list
+        Indices of rejected (non-BOLD) components in `mmix`
+    midk : list
+        Indices of mid-K (questionable) components in `mmix`
+    ref_img : str or img_like
+        Reference image to dictate how outputs are saved to disk
+    """
+
+    for i_echo in range(catd.shape[1]):
+        lgr.info("++ Writing Kappa-filtered TE#%i timeseries" % (i_echo+1))
+        write_split_ts(catd[:, i_echo, :], mmix, acc, rej, midk, ref_img,
+                       suffix='e%i' % (i_echo+1))
 
 
 def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False,
@@ -1522,7 +1890,7 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False,
         lgr.info('** WARNING! No BOLD components detected!!! \n'
                  '** Please check data and results!')
 
-    writeresults(OCcatd, comptable, mmix, n_vols, acc, rej, midk, empty, ref_img)
-    gscontrol_mmix(mmix, acc, rej, midk, empty, ref_img)
+    writeresults(OCcatd, mask, comptable, mmix, n_vols, acc, rej, midk, empty, ref_img)
+    gscontrol_mmix(OCcatd, mmix, acc, rej, midk, ref_img)
     if dne:
-        writeresults_echoes(acc, rej, midk, ref_img, comptable, mmix, n_echos)
+        writeresults_echoes(catd, mmix, acc, rej, midk, ref_img)

From 0e9e938272b87db5bb2c8e753ad9b4d0e1dd66fa Mon Sep 17 00:00:00 2001
From: Ross Markello <rossmarkello@gmail.com>
Date: Tue, 8 May 2018 22:17:54 -0400
Subject: [PATCH 11/18] [RF] Addressed reviews and minor updates

---
 Dockerfile                  |  9 ++--
 tedana/interfaces/t2smap.py | 10 ++--
 tedana/interfaces/tedana.py | 86 +++++++++++++++++----------------
 tedana/tests/test_tedana.py | 20 ++++----
 tedana/tests/test_utils.py  | 10 +---
 tedana/utils/__init__.py    |  8 ++--
 tedana/utils/utils.py       | 95 ++++++-------------------------------
 7 files changed, 84 insertions(+), 154 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 7409b80a0..57f3e15c3 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -98,7 +98,7 @@ RUN conda create -y -q --name default --channel vida-nyu python=3.6.1 \
     && sync && conda clean -tipsy && sync \
     && /bin/bash -c "source activate default \
     	&& pip install -q --no-cache-dir \
-    	nipype ipython scikit-learn scipy ipdb mdp" \
+    	nipype ipython scikit-learn scipy ipdb mdp nilearn nibabel>=2.1.0" \
     && sync
 ENV PATH=/opt/conda/envs/default/bin:$PATH
 
@@ -110,15 +110,12 @@ RUN conda create -y -q --name py27 python=2.7 \
     && sync && conda clean -tipsy && sync \
     && /bin/bash -c "source activate default \
         && pip install -q --no-cache-dir \
-        nipype ipython scikit-learn scipy ipdb mdp" \
+        nipype ipython scikit-learn scipy ipdb mdp nilearn nibabel>=2.1.0" \
     && sync
 
 USER root
 
 # User-defined instruction
-RUN mkdir /home/neuro/code
-
-# User-defined instruction
-RUN mkdir /home/neuro/data
+RUN mkdir /home/neuro/code /home/neuro/data
 
 WORKDIR /home/neuro
diff --git a/tedana/interfaces/t2smap.py b/tedana/interfaces/t2smap.py
index 61020e168..8e2d5d3af 100644
--- a/tedana/interfaces/t2smap.py
+++ b/tedana/interfaces/t2smap.py
@@ -1,5 +1,5 @@
 import numpy as np
-from tedana.utils import (filewrite, load_data, makeadmask, unmask, fmask)
+from tedana.utils import (filewrite, load_data, make_adaptive_mask, unmask)
 
 import logging
 logging.basicConfig(format='[%(levelname)s]: %(message)s', level=logging.INFO)
@@ -12,7 +12,7 @@ def fit(data, mask, tes, masksum, start_echo):
     T2* and S0 timeseries.
     """
     nx, ny, nz, n_echos, n_trs = data.shape
-    echodata = fmask(data, mask)
+    echodata = data[mask]
     tes = np.array(tes)
 
     t2sa_ts = np.zeros([nx, ny, nz, n_trs])
@@ -173,10 +173,10 @@ def make_optcom(data, t2s, tes, mask, combmode):
     tes = np.array(tes)[np.newaxis]  # (1 x E) array_like
 
     if t2s.ndim == 1:
-        lgr.info('Optimally combining with voxel-wise T2 estimates')
+        lgr.info('++ Optimally combining data with voxel-wise T2 estimates')
         ft2s = t2s[mask, np.newaxis]
     else:
-        lgr.info('Optimally combining with voxel- and volume-wise T2 estimates')
+        lgr.info('++ Optimally combining data with voxel- and volume-wise T2 estimates')
         ft2s = t2s[mask, :, np.newaxis]
 
     if combmode == 'ste':
@@ -223,7 +223,7 @@ def main(options):
     ref_img = data[0] if isinstance(data, list) else data
 
     lgr.info("++ Computing Mask")
-    mask, masksum = makeadmask(catd, minimum=False, getsum=True)
+    mask, masksum = make_adaptive_mask(catd, minimum=False, getsum=True)
     filewrite(masksum, 'masksum%s' % suf, ref_img, copy_header=False)
 
     lgr.info("++ Computing Adaptive T2* map")
diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py
index 37f2ac3e8..98b8c292f 100644
--- a/tedana/interfaces/tedana.py
+++ b/tedana/interfaces/tedana.py
@@ -11,8 +11,8 @@
 from sklearn.cluster import DBSCAN
 from tedana.interfaces import (make_optcom, t2sadmap)
 from tedana.utils import (load_image, load_data, get_dtype,
-                          make_min_mask, makeadmask,
-                          fmask, unmask, filewrite,
+                          make_min_mask, make_adaptive_mask,
+                          unmask, filewrite,
                           fitgaussian, dice, andb)
 
 import logging
@@ -151,16 +151,16 @@ def get_coeffs(data, mask, X, add_const=False):
     # mask data and flip (time x samples)
     mdata = data[mask].T
 
-    # coerce X to >=2d if single variable supplies
+    # coerce X to >=2d
     X = np.atleast_2d(X)
-    if X.shape[0] == 1:
+
+    if len(X) == 1:
         X = X.T
     if add_const:  # add intercept, if specified
-        Xones = np.ones((np.min(mdata.shape), 1))
-        X = np.column_stack([X, Xones])
+        X = np.column_stack([X, np.ones((len(X), 1))])
 
     betas = np.linalg.lstsq(X, mdata)[0].T
-    if add_const:  # drop beta for intercept
+    if add_const:  # drop beta for intercept, if specified
         betas = betas[:, :-1]
     betas = unmask(betas, mask)
 
@@ -316,7 +316,8 @@ def eimask(dd, ees=None):
         perc98 = stats.scoreatpercentile(dd[:, ee, :].flatten(), 98,
                                          interpolation_method='lower')
         lthr, hthr = 0.001 * perc98, 5 * perc98
-        lgr.info('++ Eimask threshold boundaries: {}'.format([lthr, hthr]))
+        lgr.info('++ Eimask threshold boundaries: '
+                 '{:.03f} {:.03f}'.format(lthr, hthr))
         m = dd[:, ee, :].mean(axis=1)
         imask[np.logical_and(m > lthr, m < hthr), ee] = True
 
@@ -389,7 +390,7 @@ def computefeats2(data, mmix, mask, normalize=True):
 
     # R-to-Z transform
     data_Z = np.arctanh(data_R)
-    if len(data_Z.shape) == 1:
+    if data_Z.ndim == 1:
         data_Z = np.atleast_2d(data_Z).T
 
     # normalize data
@@ -471,8 +472,7 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img,
     """
 
     # compute optimal combination of raw data
-    tsoc = np.array(make_optcom(catd, t2sG, tes, mask, combmode),
-                    dtype=float)[mask]
+    tsoc = make_optcom(catd, t2sG, tes, mask, combmode).astype(float)[mask]
     # demean optimal combination
     tsoc_dm = tsoc - tsoc.mean(axis=-1, keepdims=True)
 
@@ -593,6 +593,7 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img,
     # full selection including clustering criteria
     seldict = None
     if full_sel:
+        lgr.info('++ Performing spatial clustering of components')
         for i in range(n_components):
             # save out files
             out = np.zeros((n_samp, 4))
@@ -606,7 +607,7 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img,
             out[:, 2] = np.squeeze(unmask(F_S0_maps[:, i], t2s != 0))
             out[:, 3] = np.squeeze(unmask(Z_maps[:, i], mask))
 
-            filewrite(out, ccname, ref_img, gzip=gzip)
+            ccname = filewrite(out, ccname, ref_img, gzip=gzip)
 
             if get_dtype(ref_img) == 'GIFTI':
                 continue  # TODO: pass through GIFTI file data as below
@@ -694,10 +695,11 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, debug=False, olevel=2, ove
     ign : list
         Indices of ignored components in `mmix`
     """
+
     if filecsdata:
         import bz2
         if seldict is not None:
-            lgr.info('Saving component selection data')
+            lgr.info('++ Saving component selection data')
             csstate_f = bz2.BZ2File('compseldata.pklbz', 'wb')
             pickle.dump(seldict, csstate_f)
             csstate_f.close()
@@ -707,7 +709,7 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, debug=False, olevel=2, ove
                 seldict = pickle.load(csstate_f)
                 csstate_f.close()
             except FileNotFoundError:
-                lgr.info('No component data found!')
+                lgr.warning('++ No component data found!')
                 return None
 
     # Dump dictionary into variable names
@@ -1043,13 +1045,14 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, debug=False, olevel=2, ove
     for t2sl_i in range(len(t2s_lim)):
         t2sl = t2s_lim[t2sl_i]
         veinW = sig_B[:, veinc]*np.tile(rej_veinRZ, [sig_B.shape[0], 1])
-        veincand = fmask(unmask(andb([s0[t2s != 0] < np.median(s0[t2s != 0]),
-                                t2s[t2s != 0] < t2sl]) >= 1, t2s != 0), mask)
+        veincand = unmask(andb([s0[t2s != 0] < np.median(s0[t2s != 0]),
+                                t2s[t2s != 0] < t2sl]) >= 1,
+                          t2s != 0)[mask]
         veinW[~veincand] = 0
-        invein = veinW.sum(1)[fmask(unmask(veinmaskf, mask) * unmask(veinW.sum(1) > 1, mask),
-                                    mask)]
+        invein = veinW.sum(axis=1)[(unmask(veinmaskf, mask) *
+                                    unmask(veinW.sum(axis=1) > 1, mask))[mask]]
         minW = 10 * (np.log10(invein).mean()) - 1 * 10**(np.log10(invein).std())
-        veinmaskB = veinW.sum(1) > minW
+        veinmaskB = veinW.sum(axis=1) > minW
         tsoc_Bp = tsoc_B.copy()
         tsoc_Bp[tsoc_Bp < 0] = 0
         vvex = np.array([(tsoc_Bp[veinmaskB, ii]**2.).sum() /
@@ -1136,8 +1139,8 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, debug=False, olevel=2, ove
     return list(sorted(ncl)), list(sorted(rej)), list(sorted(midk)), list(sorted(ign))
 
 
-def tedpca(catd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, ste=0,
-           mlepca=True):
+def tedpca(catd, OCcatd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw,
+           ste=0, mlepca=True):
     """
     Performs PCA on `catd` and uses TE-dependence to dimensionally reduce data
 
@@ -1145,6 +1148,8 @@ def tedpca(catd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, ste=0,
     ----------
     catd : (S x E x T) array_like
         Input functional data
+    OCcatd : (S x T) array_like
+        Optimally-combined time series data
     combmode : {'t2s', 'ste'} str
         How optimal combination of echos should be made, where 't2s' indicates
         using the method of Posse 1999 and 'ste' indicates using the method of
@@ -1183,13 +1188,13 @@ def tedpca(catd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, ste=0,
     ste = np.array([int(ee) for ee in str(ste).split(',')])
 
     if len(ste) == 1 and ste[0] == -1:
-        lgr.info('-Computing PCA of optimally combined multi-echo data')
+        lgr.info('++ Computing PCA of optimally combined multi-echo data')
         d = OCcatd[make_min_mask(OCcatd[:, np.newaxis, :])][:, np.newaxis, :]
     elif len(ste) == 1 and ste[0] == 0:
-        lgr.info('-Computing PCA of spatially concatenated multi-echo data')
+        lgr.info('++ Computing PCA of spatially concatenated multi-echo data')
         d = catd[mask].astype('float64')
     else:
-        lgr.info('-Computing PCA of TE #%s' % ','.join([str(ee) for ee in ste]))
+        lgr.info('++ Computing PCA of TE #%s' % ','.join([str(ee) for ee in ste]))
         d = np.stack([catd[mask, ee] for ee in ste - 1], axis=1).astype('float64')
 
     eim = np.squeeze(eimask(d))
@@ -1237,7 +1242,7 @@ def tedpca(catd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, ste=0,
         ctb = np.vstack([ctb.T[:3], sp]).T
 
         # Save state
-        lgr.info('Saving PCA')
+        lgr.info('++ Saving PCA')
         pcastate = {'u': u, 's': s, 'v': v, 'ctb': ctb,
                     'eigelb': eigelb, 'spmin': spmin, 'spcum': spcum}
         try:
@@ -1247,7 +1252,7 @@ def tedpca(catd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, ste=0,
             lgr.warning('Could not save PCA solution.')
 
     else:  # if loading existing state
-        lgr.info('Loading PCA')
+        lgr.info('++ Loading PCA')
         with open('pcastate.pkl', 'rb') as handle:
             pcastate = pickle.load(handle)
         u, s, v = pcastate['u'], pcastate['s'], pcastate['v']
@@ -1291,8 +1296,8 @@ def tedpca(catd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw, ste=0,
     dd = u.dot(np.diag(s*np.array(pcsel, dtype=np.int))).dot(v)
 
     n_components = s[pcsel].shape[0]
-    lgr.info('--Selected {0} components. Minimum Kappa={1:.02f} '
-             'Rho={2:.02f}'.format(n_components, kappa_thr, rho_thr))
+    lgr.info('++ Selected {0} components. Kappa threshold: {1:.02f}, '
+             'Rho threshold: {2:.02f}'.format(n_components, kappa_thr, rho_thr))
 
     dd = stats.zscore(dd.T, axis=0).T  # variance normalize timeseries
     dd = stats.zscore(dd, axis=None)  # variance normalize everything
@@ -1359,8 +1364,7 @@ def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4):
     catd : (S x E x T) array_like
         Input functional data
     optcom : (S x T) array_like
-        Optimally-combined functional data (i.e., the output of
-        `tedana.interfaces.t2smap.make_optcom`)
+        Optimally-combined functional data (i.e., the output of `make_optcom`)
     n_echos : int
         Number of echos in data. Should be the same as `E` dimension of `catd`
     ref_img : str or img_like
@@ -1410,7 +1414,7 @@ def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4):
 
     filewrite(optcom, 'tsoc_orig', ref_img)
     dm_optcom = unmask(tsoc_nogs, Gmask)
-    filewrite(dm_optcom, 'tsoc_nogs.nii', ref_img)
+    filewrite(dm_optcom, 'tsoc_nogs', ref_img)
 
     # Project glbase out of each echo
     dm_catd = catd.copy()  # don't overwrite catd
@@ -1683,18 +1687,18 @@ def writeresults(ts, mask, comptable, mmix, n_vols, acc, rej, midk, empty, ref_i
 
     lgr.info('++ Writing optimally combined time series')
     filewrite(ts, 'ts_OC', ref_img)
-    lgr.info("++ Writing Kappa-filtered optimally combined timeseries")
+    lgr.info('++ Writing Kappa-filtered optimally combined timeseries')
     varexpl = write_split_ts(ts, mmix, acc, rej, midk, ref_img, suffix='OC')
-    lgr.info("++ Writing signal versions of components")
+    lgr.info('++ Writing signal versions of components')
     ts_B = get_coeffs(ts, mask, mmix)
     filewrite(ts_B, 'betas_OC', ref_img)
 
     if len(acc) != 0:
         filewrite(ts_B[:, acc], 'betas_hik_OC', ref_img)
-        lgr.info("++ Writing optimally combined high-Kappa features")
+        lgr.info('++ Writing optimally combined high-Kappa features')
         writefeats(split_ts(ts, mmix, mask, acc)[0],
                    mmix[:, acc], mask, ref_img, suffix='OC2')
-    lgr.info("++ Writing component table")
+    lgr.info('++ Writing component table')
     writect(comptable, n_vols, acc, rej, midk, empty, ctname='comp_table.txt',
             varexpl=varexpl)
 
@@ -1791,6 +1795,7 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False,
     n_echos = len(tes)
 
     # coerce data to samples x echos x time array
+    lgr.info('++ Loading input data: {}'.format(data))
     catd, ref_img = load_data(data, n_echos=n_echos)
     n_samp, n_echos, n_vols = catd.shape
 
@@ -1825,12 +1830,11 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False,
 
     lgr.info('++ Computing Mask')
     global mask
-    mask, masksum = makeadmask(catd, minimum=False, getsum=True)
+    mask, masksum = make_adaptive_mask(catd, minimum=False, getsum=True)
 
     lgr.info('++ Computing T2* map')
     global t2s, s0, t2sG
-    t2s, s0, t2ss, s0s, t2sG, s0G = t2sadmap(catd, tes,
-                                             mask, masksum,
+    t2s, s0, t2ss, s0s, t2sG, s0G = t2sadmap(catd, tes, mask, masksum,
                                              start_echo=1)
 
     # set a hard cap for the T2* map
@@ -1838,12 +1842,12 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False,
     cap_t2s = stats.scoreatpercentile(t2s.flatten(), 99.5,
                                       interpolation_method='lower')
     t2s[t2s > cap_t2s * 10] = cap_t2s
-    filewrite(s0, op.join(out_dir, 's0v'), ref_img)
     filewrite(t2s, op.join(out_dir, 't2sv'), ref_img)
+    filewrite(s0, op.join(out_dir, 's0v'), ref_img)
     filewrite(t2ss, op.join(out_dir, 't2ss'), ref_img)
     filewrite(s0s, op.join(out_dir, 's0vs'), ref_img)
-    filewrite(s0G, op.join(out_dir, 's0vG'), ref_img)
     filewrite(t2sG, op.join(out_dir, 't2svG'), ref_img)
+    filewrite(s0G, op.join(out_dir, 's0vG'), ref_img)
 
     # optimally combine data
     global OCcatd
@@ -1855,7 +1859,7 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False,
 
     if mixm is None:
         lgr.info("++ Doing ME-PCA and ME-ICA")
-        n_components, dd = tedpca(catd, combmode, mask, stabilize, ref_img,
+        n_components, dd = tedpca(catd, OCcatd, combmode, mask, stabilize, ref_img,
                                   tes=tes, kdaw=kdaw, rdaw=rdaw, ste=ste)
         mmix_orig = tedica(n_components, dd, conv, fixed_seed, cost=initcost,
                            final_cost=finalcost)
diff --git a/tedana/tests/test_tedana.py b/tedana/tests/test_tedana.py
index 9a18d25ce..3abce279e 100644
--- a/tedana/tests/test_tedana.py
+++ b/tedana/tests/test_tedana.py
@@ -36,7 +36,18 @@ def test_outputs():
     """
     Compare the niftis specified in the below list again
     """
+
     nifti_test_list = [
+     't2sv.nii',
+     's0v.nii',
+     't2ss.nii',
+     's0vs.nii',
+     't2svG.nii',
+     's0vG.nii',
+     'T1gs.nii',
+     'tsoc_orig.nii',
+     'tsoc_nogs.nii',
+     # files are in order of creation above this point
      '.cc_temp.nii.gz',
      '.fcl_in.nii.gz',
      '.fcl_out.nii.gz',
@@ -52,17 +63,8 @@ def test_outputs():
      'hik_ts_OC_T1c.nii',
      'lowk_ts_OC.nii',
      'midk_ts_OC.nii',
-     's0v.nii',
-     's0vG.nii',
-     's0vs.nii',
      'sphis_hik.nii',
-     'T1gs.nii',
-     't2ss.nii',
-     't2sv.nii',
-     't2svG.nii',
      'ts_OC.nii',
-     'tsoc_nogs.nii',
-     'tsoc_orig.nii',
      'veins_l0.nii',
      'veins_l1.nii']
     test_dir = Path('/home/neuro/data/TED/')
diff --git a/tedana/tests/test_utils.py b/tedana/tests/test_utils.py
index 8a027a858..102f36a89 100644
--- a/tedana/tests/test_utils.py
+++ b/tedana/tests/test_utils.py
@@ -11,7 +11,7 @@ def test_cat2echos():
     pass
 
 
-def test_makeadmask():
+def test_make_adaptive_mask():
     pass
 
 
@@ -19,14 +19,6 @@ def test_make_min_mask():
     pass
 
 
-def test_uncat2echos():
-    pass
-
-
-def test_fmask():
-    pass
-
-
 def test_unmask():
     pass
 
diff --git a/tedana/utils/__init__.py b/tedana/utils/__init__.py
index 771178fd5..00b45b9bc 100644
--- a/tedana/utils/__init__.py
+++ b/tedana/utils/__init__.py
@@ -3,14 +3,14 @@
 
 from .utils import (
     load_image, load_data, get_dtype,
-    make_min_mask, makeadmask,
-    fmask, unmask, filewrite,
+    make_min_mask, make_adaptive_mask,
+    unmask, filewrite,
     fitgaussian, dice, andb,
 )
 
 
 __all__ = [
     'load_image', 'load_data', 'get_dtype'
-    'make_min_mask', 'makeadmask',
-    'fmask', 'unmask', 'filewrite',
+    'make_min_mask', 'make_adaptive_mask',
+    'unmask', 'filewrite',
     'fitgaussian', 'dice', 'andb']
diff --git a/tedana/utils/utils.py b/tedana/utils/utils.py
index a090b95db..7d7b01796 100644
--- a/tedana/utils/utils.py
+++ b/tedana/utils/utils.py
@@ -7,6 +7,7 @@
 from nilearn._utils import check_niimg
 import nilearn.masking as nimask
 from scipy.optimize import leastsq
+from sklearn.utils import check_array
 
 from ..due import due, BibTeX
 
@@ -125,7 +126,7 @@ def load_data(data, n_echos=None):
     return fdata, ref_img
 
 
-def makeadmask(data, minimum=True, getsum=False):
+def make_adaptive_mask(data, minimum=True, getsum=False):
     """
     Makes map of `data` specifying longest echo a voxel can be sampled with
 
@@ -362,89 +363,25 @@ def make_gii_darray(ref_array, data, copy_meta=False):
     return darray
 
 
-def uncat2echos(data):
-    """
-    Combines Z- and echo-axis in `data`
-
-    Parameters
-    ----------
-    data : (X x Y x Z x E x T) array_like
-        Multi-echo data array
-
-    Returns
-    -------
-    fdata : (X x Y x M x T) np.ndarray
-        Z-concatenated multi-echo data array, where M is Z * number of echos
-    """
-
-    if data.ndim < 4:
-        raise ValueError('Input data must have at least four dimensions; '
-                         'provided data has only {0}'.format(data.ndim))
-
-    (nx, ny), nz = data.shape[:2], np.prod(data.shape[2:4])
-    return data.reshape(nx, ny, nz, -1)
-
-
-def fmask(data, mask=None):
-    """
-    Masks `data` with non-zero entries of `mask`
-
-    Parameters
-    ----------
-    data : (X x Y x Z [x E [x T]) array_like or img_like object
-        Data array or data file to be masked
-    mask : (X x Y x Z) array_like or img_like object
-        Boolean array or mask file
-
-    Returns
-    -------
-    fdata : (S x E x T) np.ndarray
-        Masked `data`, where `S` is samples, `E` is echoes, and `T` is time
-    """
-
-    if mask is not None and not type(data) == type(mask):
-        raise TypeError('Provided `data` and `mask` must be of same type.')
-
-    if isinstance(data, str):
-        root, ext, addext = splitext_addext(data)
-        if ext == '.gii':
-            # mask need not apply for gii files
-            fdata = np.column_stack([f.data for f in nib.load(data).darrays])
-        else:
-            # use nilearn for other files
-            data = check_niimg(data)
-            if mask is not None:
-                # TODO: check that this uses same order to flatten
-                fdata = nimask.apply_mask(data, mask).T
-            else:
-                fdata = data.get_data().reshape((-1,) + data.shape[3:])
-    elif isinstance(data, np.ndarray):
-        # flatten data over first three dimensions and apply mask
-        fdata = data.reshape((-1,) + data.shape[3:])
-        if mask is not None:
-            fdata = fdata[mask.flatten() > 0]
-
-    return fdata.squeeze()
-
-
 def unmask(data, mask):
     """
     Unmasks `data` using non-zero entries of `mask`
 
     Parameters
     ----------
-    data : (M x E x T) array_like
-        Masked array, where `M` is the number of samples
+    data : (M [x E [x T]]) array_like
+        Masked array, where `M` is the number of `True` values in `mask`
     mask : (S,) array_like
-        Boolean array of `S` samples that was used to mask `data`
+        Boolean array of `S` samples that was used to mask `data`. It should
+        have exactly `M` True values.
 
     Returns
     -------
-    out : (S x E x T) np.ndarray
+    out : (S [x E [x T]]) np.ndarray
         Unmasked `data` array
     """
 
-    out = np.zeros((mask.shape + data.shape[1:]))
+    out = np.zeros(mask.shape + data.shape[1:])
     out[mask] = data
     return out
 
@@ -618,15 +555,13 @@ def andb(arrs):
         Integer array of summed `arrs`
     """
 
-    same_shape = []
-    for arr in arrs:
-        for arr2 in arrs:
-            same_shape.append(arr.shape == arr2.shape)
-
+    # coerce to integer and ensure same shape
+    arrs = [check_array(arr, dtype=int) for arr in arrs]
+    same_shape = [arr1.shape == arr2.shape for arr1 in arrs for arr2 in arrs]
     if not np.all(same_shape):
-        raise ValueError('All input arrays must have same shape')
+        raise ValueError('All input arrays must have same shape.')
+
+    # sum across arrays
+    result = np.sum(arrs, axis=0)
 
-    result = np.zeros(arrs[0].shape)
-    for arr in arrs:
-        result += np.array(arr, dtype=np.int)
     return result

From a784e8652e1d7f40a0faf38fa28217d9f7c3f4ae Mon Sep 17 00:00:00 2001
From: Ross Markello <rossmarkello@gmail.com>
Date: Tue, 8 May 2018 22:54:16 -0400
Subject: [PATCH 12/18] [FIX] Needed order=F for when zcat data provided

---
 tedana/interfaces/tedana.py | 35 ++++++++++++++++++-----------------
 tedana/utils/utils.py       |  3 +--
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py
index 98b8c292f..224401759 100644
--- a/tedana/interfaces/tedana.py
+++ b/tedana/interfaces/tedana.py
@@ -637,9 +637,11 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img,
             # Do simple clustering on ranked signal-change map
             countsigFR2 = F_R2_clmaps[:, i].sum()
             countsigFS0 = F_S0_clmaps[:, i].sum()
-            Br_clmaps_R2[:, i] = spatclust(stats.rankdata(tsoc_Babs[:, i]), mask,
-                                           csize, max(tsoc_Babs.shape)-countsigFR2, ref_img)
-            Br_clmaps_S0[:, i] = spatclust(stats.rankdata(tsoc_Babs[:, i]), mask,
+            spclust_input = stats.rankdata(tsoc_Babs[:, i])
+            Br_clmaps_R2[:, i] = spatclust(spclust_input, mask,
+                                           csize, max(tsoc_Babs.shape)-countsigFR2,
+                                           ref_img)
+            Br_clmaps_S0[:, i] = spatclust(spclust_input, mask,
                                            csize, max(tsoc_Babs.shape)-countsigFS0,
                                            ref_img)
 
@@ -709,12 +711,13 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, debug=False, olevel=2, ove
                 seldict = pickle.load(csstate_f)
                 csstate_f.close()
             except FileNotFoundError:
-                lgr.warning('++ No component data found!')
+                lgr.warning('++ Failed to load component selection data')
                 return None
 
     # Dump dictionary into variable names
+    # TODO: this is a terrible way to do things and we should change it
     for key in seldict.keys():
-        exec("%s=seldict['%s']" % (key, key))
+        exec("{0}=seldict['{0}']".format(key))
 
     # List of components
     midk = []
@@ -897,8 +900,7 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, debug=False, olevel=2, ove
                            np.intersect1d(nc[db.labels_ == 0],
                            nc[Rhos > getelbow_mod(Rhos_sorted,
                                                   val=True)]).shape[0]])
-            if debug:
-                lgr.info('found solution', ii, db.labels_)
+            lgr.debug('++ Found solution', ii, db.labels_)
         db = None
 
     epsmap = np.array(epsmap)
@@ -908,7 +910,7 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, debug=False, olevel=2, ove
         # Select index that maximizes Dice with guessmask but first
         # minimizes number of higher Rho components
         ii = int(epsmap[np.argmax(epsmap[epsmap[:, 2] == np.min(epsmap[:, 2]), 1], 0), 0])
-        lgr.info('Component selection tuning: ', epsmap[:, 1].max())
+        lgr.info('++ Component selection tuning: ', epsmap[:, 1].max())
         db = DBSCAN(eps=.005+ii*.005, min_samples=3).fit(fz.T)
         ncl = nc[db.labels_ == 0]
         ncl = np.setdiff1d(ncl, rej)
@@ -918,7 +920,7 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, debug=False, olevel=2, ove
         to_clf = np.setdiff1d(nc, np.union1d(ncl, rej))
     if len(group0) == 0 or len(group0) < len(KRguess) * .5:
         dbscanfailed = True
-        lgr.info('DBSCAN based guess failed. Using elbow guess method.')
+        lgr.info('++ DBSCAN based guess failed. Using elbow guess method.')
         ncl = np.setdiff1d(np.setdiff1d(nc[KRelbow == 2], rej),
                            np.union1d(nc[tt_table[:, 0] < tt_lim],
                            np.union1d(np.union1d(nc[spz > 1],
@@ -929,8 +931,8 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, debug=False, olevel=2, ove
         group_n1 = []
         to_clf = np.setdiff1d(nc, np.union1d(group0, rej))
     if len(group0) < 2 or (len(group0) < 4 and float(len(rej))/len(group0) > 3):
-        lgr.info('WARNING: Extremely limited reliable BOLD signal space. '
-                 'Not filtering further into midk etc.')
+        lgr.warning('++ Extremely limited reliable BOLD signal space. '
+                    'Not filtering further into midk etc.')
         midkfailed = True
         min_acc = np.array([])
         if len(group0) != 0:
@@ -1194,7 +1196,7 @@ def tedpca(catd, OCcatd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw,
         lgr.info('++ Computing PCA of spatially concatenated multi-echo data')
         d = catd[mask].astype('float64')
     else:
-        lgr.info('++ Computing PCA of TE #%s' % ','.join([str(ee) for ee in ste]))
+        lgr.info('++ Computing PCA of echo #%s' % ','.join([str(ee) for ee in ste]))
         d = np.stack([catd[mask, ee] for ee in ste - 1], axis=1).astype('float64')
 
     eim = np.squeeze(eimask(d))
@@ -1249,7 +1251,7 @@ def tedpca(catd, OCcatd, combmode, mask, stabilize, ref_img, tes, kdaw, rdaw,
             with open('pcastate.pkl', 'wb') as handle:
                 pickle.dump(pcastate, handle)
         except TypeError:
-            lgr.warning('Could not save PCA solution.')
+            lgr.warning('++ Could not save PCA solution.')
 
     else:  # if loading existing state
         lgr.info('++ Loading PCA')
@@ -1538,7 +1540,7 @@ def write_split_ts(data, mmix, acc, rej, midk, ref_img, suffix=''):
     # get variance explained by retained components
     betas = get_coeffs(unmask(dmdata.T, mask), mask, mmix)[mask]
     varexpl = (1 - ((dmdata.T - betas.dot(mmix.T))**2.).sum() / (dmdata**2.).sum()) * 100
-    lgr.info('Variance explained: ', varexpl, '%')
+    lgr.info('++ Variance explained: ', varexpl, '%')
 
     # create component and de-noised time series and save to files
     hikts = betas[:, acc].dot(mmix.T[acc, :])
@@ -1725,7 +1727,7 @@ def writeresults_echoes(catd, mmix, acc, rej, midk, ref_img):
     """
 
     for i_echo in range(catd.shape[1]):
-        lgr.info("++ Writing Kappa-filtered TE#%i timeseries" % (i_echo+1))
+        lgr.info('++ Writing Kappa-filtered echo #{:01d} timeseries'.format(i_echo+1))
         write_split_ts(catd[:, i_echo, :], mmix, acc, rej, midk, ref_img,
                        suffix='e%i' % (i_echo+1))
 
@@ -1891,8 +1893,7 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False,
             acc, rej, midk, empty = ctabsel(ctab)
 
     if len(acc) == 0:
-        lgr.info('** WARNING! No BOLD components detected!!! \n'
-                 '** Please check data and results!')
+        lgr.warning('++ No BOLD components detected!!! Please check data and results!')
 
     writeresults(OCcatd, mask, comptable, mmix, n_vols, acc, rej, midk, empty, ref_img)
     gscontrol_mmix(OCcatd, mmix, acc, rej, midk, ref_img)
diff --git a/tedana/utils/utils.py b/tedana/utils/utils.py
index 7d7b01796..c5d38f907 100644
--- a/tedana/utils/utils.py
+++ b/tedana/utils/utils.py
@@ -5,7 +5,6 @@
 from nibabel.filename_parser import splitext_addext
 from nilearn.image import new_img_like
 from nilearn._utils import check_niimg
-import nilearn.masking as nimask
 from scipy.optimize import leastsq
 from sklearn.utils import check_array
 
@@ -117,7 +116,7 @@ def load_data(data, n_echos=None):
     # we have a z-cat file
     img = check_niimg(data)
     (nx, ny), nz = img.shape[:2], img.shape[2] // n_echos
-    fdata = load_image(img.get_data().reshape(nx, ny, nz, n_echos, -1))
+    fdata = load_image(img.get_data().reshape(nx, ny, nz, n_echos, -1, order='F'))
 
     # create reference image
     ref_img = img.__class__(np.zeros((nx, ny, nz)), affine=img.affine,

From 4d768129da32ebdcb6de677e6faae72ec2599785 Mon Sep 17 00:00:00 2001
From: Ross Markello <rossmarkello@gmail.com>
Date: Tue, 8 May 2018 23:34:19 -0400
Subject: [PATCH 13/18] [FIX] Allow N-dimensional arrays in `utils.andb`

---
 tedana/interfaces/tedana.py | 4 +---
 tedana/tests/test_tedana.py | 2 +-
 tedana/utils/utils.py       | 2 +-
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py
index 224401759..6327b8fd3 100644
--- a/tedana/interfaces/tedana.py
+++ b/tedana/interfaces/tedana.py
@@ -655,7 +655,7 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img,
     return seldict, comptab, betas, mmix_new
 
 
-def selcomps(seldict, mmix, ref_img, manacc, n_echos, debug=False, olevel=2, oversion=99,
+def selcomps(seldict, mmix, ref_img, manacc, n_echos, olevel=2, oversion=99,
              filecsdata=False, savecsdiag=True, strict_mode=False):
     """
     Labels components in `mmix`
@@ -673,8 +673,6 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, debug=False, olevel=2, ove
         Comma-separated list of indices of manually accepted components
     n_echos : int
         Number of echos in original data
-    debug : bool, optional
-        Default: False
     olevel : int, optional
         Default: 2
     oversion : int, optional
diff --git a/tedana/tests/test_tedana.py b/tedana/tests/test_tedana.py
index 3abce279e..d7643cd18 100644
--- a/tedana/tests/test_tedana.py
+++ b/tedana/tests/test_tedana.py
@@ -47,12 +47,12 @@ def test_outputs():
      'T1gs.nii',
      'tsoc_orig.nii',
      'tsoc_nogs.nii',
-     # files are in order of creation above this point
      '.cc_temp.nii.gz',
      '.fcl_in.nii.gz',
      '.fcl_out.nii.gz',
      '__clin.nii.gz',
      '__clout.nii.gz',
+     # files are in order of creation above this point
      'betas_hik_OC.nii',
      'betas_hik_OC_T1c.nii',
      'betas_OC.nii',
diff --git a/tedana/utils/utils.py b/tedana/utils/utils.py
index c5d38f907..165c8b98f 100644
--- a/tedana/utils/utils.py
+++ b/tedana/utils/utils.py
@@ -555,7 +555,7 @@ def andb(arrs):
     """
 
     # coerce to integer and ensure same shape
-    arrs = [check_array(arr, dtype=int) for arr in arrs]
+    arrs = [check_array(arr, dtype=int, ensure_2d=False, allow_nd=True) for arr in arrs]
     same_shape = [arr1.shape == arr2.shape for arr1 in arrs for arr2 in arrs]
     if not np.all(same_shape):
         raise ValueError('All input arrays must have same shape.')

From 7f70268782081264462ca5693fac831c5ae50e2e Mon Sep 17 00:00:00 2001
From: Ross Markello <rossmarkello@gmail.com>
Date: Wed, 9 May 2018 19:01:40 -0400
Subject: [PATCH 14/18] [FIX] Unmask not retaining dtype

Also, other updates to `selcomps()` to hopefully get it passing.
---
 tedana/interfaces/tedana.py | 25 +++++++++++++++++--------
 tedana/tests/test_tedana.py | 21 ++++++++++-----------
 tedana/utils/__init__.py    |  4 ++--
 tedana/utils/utils.py       |  4 +++-
 4 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py
index 6327b8fd3..64ab54806 100644
--- a/tedana/interfaces/tedana.py
+++ b/tedana/interfaces/tedana.py
@@ -12,7 +12,7 @@
 from tedana.interfaces import (make_optcom, t2sadmap)
 from tedana.utils import (load_image, load_data, get_dtype,
                           make_min_mask, make_adaptive_mask,
-                          unmask, filewrite,
+                          unmask, filewrite, new_nii_like,
                           fitgaussian, dice, andb)
 
 import logging
@@ -776,7 +776,7 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, olevel=2, oversion=99,
     # Time series derivative kurtosis
     mmix_dt = (mmix[:-1] - mmix[1:])
     mmix_kurt = stats.kurtosis(mmix_dt)
-    mmix_std = np.std(mmix_dt, 0)
+    mmix_std = np.std(mmix_dt, axis=0)
 
     """
     Step 1: Reject anything that's obviously an artifact
@@ -790,21 +790,30 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, olevel=2, oversion=99,
     Step 2: Compute 3-D spatial FFT of Beta maps to detect high-spatial
     frequency artifacts
     """
-    fproj_arr = np.zeros([np.prod(mask.shape[0:2]), len(nc)])
-    fproj_arr_val = np.zeros([np.prod(mask.shape[0:2]), len(nc)])
+    # spatial information is important so for NIFTI we convert back to 3D space
+    if get_dtype(ref_img) == 'NIFTI':
+        dim1 = np.prod(ref_img.shape[:2])
+    else:
+        dim1 = mask.shape[0]
+    fproj_arr = np.zeros([dim1, len(nc)])
+    fproj_arr_val = np.zeros([dim1, len(nc)])
     spr = []
     fdist = []
     for ii in nc:
-        fproj = np.fft.fftshift(np.abs(np.fft.rfftn(unmask(seldict['PSC'],
-                                                           mask)[:, :, :, ii])))
-        fproj_z = fproj.max(2)
+        # convert data back to 3D array
+        if get_dtype(ref_img) == 'NIFTI':
+            tproj = new_nii_like(unmask(seldict['PSC'], mask)[:, ii], ref_img).get_data()
+        else:
+            tproj = unmask(seldict['PSC'], mask)[:, ii]
+        fproj = np.fft.fftshift(np.abs(np.fft.rfftn(tproj)))
+        fproj_z = fproj.max(axis=2)
         fproj[fproj == fproj.max()] = 0
         fproj_arr[:, ii] = stats.rankdata(fproj_z.flatten())
         fproj_arr_val[:, ii] = fproj_z.flatten()
         spr.append(np.array(fproj_z > fproj_z.max() / 4, dtype=np.int).sum())
         fprojr = np.array([fproj, fproj[:, :, ::-1]]).max(0)
         fdist.append(np.max([fitgaussian(fproj.max(jj))[3:].max() for
-                     jj in range(len(fprojr.shape))]))
+                     jj in range(fprojr.ndim)]))
     fdist = np.array(fdist)
     spr = np.array(spr)
 
diff --git a/tedana/tests/test_tedana.py b/tedana/tests/test_tedana.py
index d7643cd18..2c9e8894f 100644
--- a/tedana/tests/test_tedana.py
+++ b/tedana/tests/test_tedana.py
@@ -52,21 +52,20 @@ def test_outputs():
      '.fcl_out.nii.gz',
      '__clin.nii.gz',
      '__clout.nii.gz',
-     # files are in order of creation above this point
+     'veins_l0.nii',
+     'veins_l1.nii',
+     'ts_OC.nii',
+     'hik_ts_OC.nii',
+     'midk_ts_OC.nii',
+     'lowk_ts_OC.nii',
+     'dn_ts_OC.nii',
+     'betas_OC.nii',
      'betas_hik_OC.nii',
+     'feats_OC2.nii',
      'betas_hik_OC_T1c.nii',
-     'betas_OC.nii',
-     'dn_ts_OC.nii',
      'dn_ts_OC_T1c.nii',
-     'feats_OC2.nii',
-     'hik_ts_OC.nii',
      'hik_ts_OC_T1c.nii',
-     'lowk_ts_OC.nii',
-     'midk_ts_OC.nii',
-     'sphis_hik.nii',
-     'ts_OC.nii',
-     'veins_l0.nii',
-     'veins_l1.nii']
+     'sphis_hik.nii']
     test_dir = Path('/home/neuro/data/TED/')
     res_dir = Path('/home/neuro/code/TED/')
     for fn in nifti_test_list:
diff --git a/tedana/utils/__init__.py b/tedana/utils/__init__.py
index 00b45b9bc..6257f8582 100644
--- a/tedana/utils/__init__.py
+++ b/tedana/utils/__init__.py
@@ -4,7 +4,7 @@
 from .utils import (
     load_image, load_data, get_dtype,
     make_min_mask, make_adaptive_mask,
-    unmask, filewrite,
+    unmask, filewrite, new_nii_like,
     fitgaussian, dice, andb,
 )
 
@@ -12,5 +12,5 @@
 __all__ = [
     'load_image', 'load_data', 'get_dtype'
     'make_min_mask', 'make_adaptive_mask',
-    'unmask', 'filewrite',
+    'unmask', 'filewrite', 'new_nii_like',
     'fitgaussian', 'dice', 'andb']
diff --git a/tedana/utils/utils.py b/tedana/utils/utils.py
index 165c8b98f..4a46456c3 100644
--- a/tedana/utils/utils.py
+++ b/tedana/utils/utils.py
@@ -121,6 +121,8 @@ def load_data(data, n_echos=None):
     # create reference image
     ref_img = img.__class__(np.zeros((nx, ny, nz)), affine=img.affine,
                             header=img.header, extra=img.extra)
+    ref_img.header.extensions = []
+    ref_img.header.set_sform(ref_img.header.get_sform(), code=1)
 
     return fdata, ref_img
 
@@ -380,7 +382,7 @@ def unmask(data, mask):
         Unmasked `data` array
     """
 
-    out = np.zeros(mask.shape + data.shape[1:])
+    out = np.zeros(mask.shape + data.shape[1:], dtype=data.dtype)
     out[mask] = data
     return out
 

From b8b59571307bfd6674c99926297714182cb7aa60 Mon Sep 17 00:00:00 2001
From: Ross Markello <rossmarkello@gmail.com>
Date: Wed, 9 May 2018 19:43:00 -0400
Subject: [PATCH 15/18] [FIX] Bugs in selcomps for gifti/nifti

---
 tedana/interfaces/tedana.py | 42 +++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py
index 64ab54806..0e90aa69b 100644
--- a/tedana/interfaces/tedana.py
+++ b/tedana/interfaces/tedana.py
@@ -655,7 +655,7 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img,
     return seldict, comptab, betas, mmix_new
 
 
-def selcomps(seldict, mmix, ref_img, manacc, n_echos, olevel=2, oversion=99,
+def selcomps(seldict, mmix, mask, ref_img, manacc, n_echos, olevel=2, oversion=99,
              filecsdata=False, savecsdiag=True, strict_mode=False):
     """
     Labels components in `mmix`
@@ -802,7 +802,7 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, olevel=2, oversion=99,
     for ii in nc:
         # convert data back to 3D array
         if get_dtype(ref_img) == 'NIFTI':
-            tproj = new_nii_like(unmask(seldict['PSC'], mask)[:, ii], ref_img).get_data()
+            tproj = new_nii_like(ref_img, unmask(seldict['PSC'], mask)[:, ii]).get_data()
         else:
             tproj = unmask(seldict['PSC'], mask)[:, ii]
         fproj = np.fft.fftshift(np.abs(np.fft.rfftn(tproj)))
@@ -917,7 +917,7 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, olevel=2, oversion=99,
         # Select index that maximizes Dice with guessmask but first
         # minimizes number of higher Rho components
         ii = int(epsmap[np.argmax(epsmap[epsmap[:, 2] == np.min(epsmap[:, 2]), 1], 0), 0])
-        lgr.info('++ Component selection tuning: ', epsmap[:, 1].max())
+        lgr.info('++ Component selection tuning: {:.05f}'.format(epsmap[:, 1].max()))
         db = DBSCAN(eps=.005+ii*.005, min_samples=3).fit(fz.T)
         ncl = nc[db.labels_ == 0]
         ncl = np.setdiff1d(ncl, rej)
@@ -953,8 +953,8 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, olevel=2, oversion=99,
                          'Kappa cut point', 'Rho cut point', 'DBSCAN failed to converge',
                          'Mid-Kappa failed (limited BOLD signal)', 'Kappa-Rho guess',
                          'min_acc', 'toacc_hi']
-        diagstep_vals = [rej, KRcut, Kcut, Rcut, dbscanfailed,
-                         midkfailed, KRguess, min_acc, toacc_hi]
+        diagstep_vals = [rej.tolist(), KRcut, Kcut, Rcut, dbscanfailed,
+                         midkfailed, KRguess.tolist(), min_acc.tolist(), toacc_hi.tolist()]
 
         with open('csstepdata.txt', 'w') as ofh:
             json.dump(dict(zip(diagstep_keys, diagstep_vals)), ofh, indent=4, sort_keys=True)
@@ -1069,7 +1069,7 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, olevel=2, oversion=99,
         group0_res = np.intersect1d(KRguess, group0)
         phys_var_zs.append((vvex - vvex[group0_res].mean()) / vvex[group0_res].std())
         veinBout = unmask(veinmaskB, mask)
-        filewrite(veinBout, 'veins_l%i' % t2sl_i, ref_img)
+        filewrite(veinBout.astype(int), 'veins_l%i' % t2sl_i, ref_img)
 
     # Mask to sample veins
     phys_var_z = np.array(phys_var_zs).max(0)
@@ -1135,10 +1135,12 @@ def selcomps(seldict, mmix, ref_img, manacc, n_echos, olevel=2, oversion=99,
                          'Mid-kappa components', 'svm_acc_fail', 'toacc_hi', 'toacc_lo',
                          'Field artifacts', 'Physiological artifacts',
                          'Miscellaneous artifacts', 'ncl', 'Ignored components']
-        diagstep_vals = [rej, KRcut, Kcut, Rcut, dbscanfailed,
-                         KRguess, dice_rej, rej_supp, to_clf,
-                         midk, svm_acc_fail, toacc_hi, toacc_lo,
-                         field_art, phys_art, misc_art, ncl, ign]
+        diagstep_vals = [rej.tolist(), KRcut, Kcut, Rcut, dbscanfailed,
+                         KRguess.tolist(), dice_rej, rej_supp.tolist(),
+                         to_clf.tolist(), midk.tolist(), svm_acc_fail,
+                         toacc_hi.tolist(), toacc_lo.tolist(),
+                         field_art.tolist(), phys_art.tolist(),
+                         misc_art.tolist(), ncl.tolist(), ign.tolist()]
 
         with open('csstepdata.txt', 'w') as ofh:
             json.dump(dict(zip(diagstep_keys, diagstep_vals)), ofh, indent=4, sort_keys=True)
@@ -1437,7 +1439,7 @@ def gscontrol_raw(catd, optcom, n_echos, ref_img, dtrank=4):
     return dm_catd, dm_optcom
 
 
-def gscontrol_mmix(OCcatd, mmix, acc, rej, midk, ref_img):
+def gscontrol_mmix(OCcatd, mmix, mask, acc, rej, midk, ref_img):
     """
     Parameters
     ----------
@@ -1512,7 +1514,7 @@ def gscontrol_mmix(OCcatd, mmix, acc, rej, midk, ref_img):
     np.savetxt('meica_mix_T1c.1D', mmixnogs)
 
 
-def write_split_ts(data, mmix, acc, rej, midk, ref_img, suffix=''):
+def write_split_ts(data, mmix, mask, acc, rej, midk, ref_img, suffix=''):
     """
     Splits `data` into denoised / noise / ignored time series and saves to disk
 
@@ -1547,7 +1549,7 @@ def write_split_ts(data, mmix, acc, rej, midk, ref_img, suffix=''):
     # get variance explained by retained components
     betas = get_coeffs(unmask(dmdata.T, mask), mask, mmix)[mask]
     varexpl = (1 - ((dmdata.T - betas.dot(mmix.T))**2.).sum() / (dmdata**2.).sum()) * 100
-    lgr.info('++ Variance explained: ', varexpl, '%')
+    lgr.info('++ Variance explained: {:.02f}%'.format(varexpl))
 
     # create component and de-noised time series and save to files
     hikts = betas[:, acc].dot(mmix.T[acc, :])
@@ -1697,7 +1699,7 @@ def writeresults(ts, mask, comptable, mmix, n_vols, acc, rej, midk, empty, ref_i
     lgr.info('++ Writing optimally combined time series')
     filewrite(ts, 'ts_OC', ref_img)
     lgr.info('++ Writing Kappa-filtered optimally combined timeseries')
-    varexpl = write_split_ts(ts, mmix, acc, rej, midk, ref_img, suffix='OC')
+    varexpl = write_split_ts(ts, mmix, mask, acc, rej, midk, ref_img, suffix='OC')
     lgr.info('++ Writing signal versions of components')
     ts_B = get_coeffs(ts, mask, mmix)
     filewrite(ts_B, 'betas_OC', ref_img)
@@ -1712,7 +1714,7 @@ def writeresults(ts, mask, comptable, mmix, n_vols, acc, rej, midk, empty, ref_i
             varexpl=varexpl)
 
 
-def writeresults_echoes(catd, mmix, acc, rej, midk, ref_img):
+def writeresults_echoes(catd, mmix, mask, acc, rej, midk, ref_img):
     """
     Saves individually denoised echos to disk
 
@@ -1735,7 +1737,7 @@ def writeresults_echoes(catd, mmix, acc, rej, midk, ref_img):
 
     for i_echo in range(catd.shape[1]):
         lgr.info('++ Writing Kappa-filtered echo #{:01d} timeseries'.format(i_echo+1))
-        write_split_ts(catd[:, i_echo, :], mmix, acc, rej, midk, ref_img,
+        write_split_ts(catd[:, i_echo, :], mmix, mask, acc, rej, midk, ref_img,
                        suffix='e%i' % (i_echo+1))
 
 
@@ -1881,7 +1883,7 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False,
                                                            reindex=True)
         np.savetxt(op.join(out_dir, 'meica_mix.1D'), mmix)
 
-        acc, rej, midk, empty = selcomps(seldict, mmix, ref_img, manacc, n_echos,
+        acc, rej, midk, empty = selcomps(seldict, mmix, mask, ref_img, manacc, n_echos,
                                          strict_mode=strict,
                                          filecsdata=filecsdata)
     else:
@@ -1892,7 +1894,7 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False,
                                                            ref_img,
                                                            fout=fout)
         if ctab is None:
-            acc, rej, midk, empty = selcomps(seldict, mmix, ref_img, manacc,
+            acc, rej, midk, empty = selcomps(seldict, mmix, mask, ref_img, manacc,
                                              n_echos,
                                              filecsdata=filecsdata,
                                              strict_mode=strict)
@@ -1903,6 +1905,6 @@ def main(data, tes, mixm=None, ctab=None, manacc=None, strict=False,
         lgr.warning('++ No BOLD components detected!!! Please check data and results!')
 
     writeresults(OCcatd, mask, comptable, mmix, n_vols, acc, rej, midk, empty, ref_img)
-    gscontrol_mmix(OCcatd, mmix, acc, rej, midk, ref_img)
+    gscontrol_mmix(OCcatd, mmix, mask, acc, rej, midk, ref_img)
     if dne:
-        writeresults_echoes(catd, mmix, acc, rej, midk, ref_img)
+        writeresults_echoes(catd, mmix, mask, acc, rej, midk, ref_img)

From ddafa7c02ae5c0ab526f70d987eed9ffe818763d Mon Sep 17 00:00:00 2001
From: Ross Markello <rossmarkello@gmail.com>
Date: Wed, 9 May 2018 22:26:25 -0400
Subject: [PATCH 16/18] [FIX] Minor updates to doc-strings + names

---
 tedana/interfaces/tedana.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py
index 0e90aa69b..65a032f40 100644
--- a/tedana/interfaces/tedana.py
+++ b/tedana/interfaces/tedana.py
@@ -110,7 +110,7 @@ def spatclust(data, mask, csize, thr, ref_img, infile=None, dindex=0,
     if infile is None:
         data = data.copy()
         data[data < thr] = 0
-        infile = filewrite(unmask(data, mask), '__clin', ref_img, gzip=True)
+        infile = filewrite(unmask(data, mask), '__clin.nii.gz', ref_img)
 
     # FIXME: ideally no calls to os.system!!! (or AFNI, for that matter)
     addopts = ''
@@ -503,7 +503,7 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img,
     n_samp, n_echos, n_components = betas.shape
     n_voxels = mask.sum()
     n_data_voxels = (t2s != 0).sum()
-    mu = catd.mean(axis=-1)
+    mu = catd.mean(axis=-1)  # BUG: THIS IS THE BAD PLACE
     tes = np.reshape(tes, (n_echos, 1))
     fmin, fmid, fmax = getfbounds(n_echos)
 
@@ -600,7 +600,7 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img,
             if fout is not None:
                 ccname, gzip = 'cc{:03d}'.format(i), False
             else:
-                ccname, gzip = '.cc_temp', True
+                ccname, gzip = '.cc_temp.nii.gz', True
 
             out[:, 0] = np.squeeze(unmask(PSC[:, i], mask))
             out[:, 1] = np.squeeze(unmask(F_R2_maps[:, i], t2s != 0))
@@ -667,9 +667,11 @@ def selcomps(seldict, mmix, mask, ref_img, manacc, n_echos, olevel=2, oversion=9
     mmix : (C x T) array_like
         Mixing matrix for converting input data to component space, where `C`
         is components and `T` is the number of volumes in the original data
-    ref_img
+    mask : (S,) array_like
+        Boolean mask array
+    ref_img : str or img_like
         Reference image to dictate how outputs are saved to disk
-    manacc
+    manacc : list
         Comma-separated list of indices of manually accepted components
     n_echos : int
         Number of echos in original data
@@ -956,7 +958,7 @@ def selcomps(seldict, mmix, mask, ref_img, manacc, n_echos, olevel=2, oversion=9
         diagstep_vals = [rej.tolist(), KRcut, Kcut, Rcut, dbscanfailed,
                          midkfailed, KRguess.tolist(), min_acc.tolist(), toacc_hi.tolist()]
 
-        with open('csstepdata.txt', 'w') as ofh:
+        with open('csstepdata.json', 'w') as ofh:
             json.dump(dict(zip(diagstep_keys, diagstep_vals)), ofh, indent=4, sort_keys=True)
         return list(sorted(min_acc)), list(sorted(rej)), [], list(sorted(to_clf))
 
@@ -1142,7 +1144,7 @@ def selcomps(seldict, mmix, mask, ref_img, manacc, n_echos, olevel=2, oversion=9
                          field_art.tolist(), phys_art.tolist(),
                          misc_art.tolist(), ncl.tolist(), ign.tolist()]
 
-        with open('csstepdata.txt', 'w') as ofh:
+        with open('csstepdata.json', 'w') as ofh:
             json.dump(dict(zip(diagstep_keys, diagstep_vals)), ofh, indent=4, sort_keys=True)
         allfz = np.array([Tz, Vz, Ktz, KRr, cnz, Rz, mmix_kurt, fdist_z])
         np.savetxt('csdata.txt', allfz)
@@ -1448,6 +1450,8 @@ def gscontrol_mmix(OCcatd, mmix, mask, acc, rej, midk, ref_img):
     mmix : (C x T) array_like
         Mixing matrix for converting input data to component space, where `C`
         is components and `T` is the same as in `OCcatd`
+    mask : (S,) array_like
+        Boolean mask array
     acc : list
         Indices of accepted (BOLD) components in `mmix`
     rej : list
@@ -1525,6 +1529,8 @@ def write_split_ts(data, mmix, mask, acc, rej, midk, ref_img, suffix=''):
     mmix : (C x T) array_like
         Mixing matrix for converting input data to component space, where `C`
         is components and `T` is the same as in `data`
+    mask : (S,) array_like
+        Boolean mask array
     acc : list
         Indices of accepted (BOLD) components in `mmix`
     rej : list
@@ -1725,6 +1731,8 @@ def writeresults_echoes(catd, mmix, mask, acc, rej, midk, ref_img):
     mmix : (C x T) array_like
         Mixing matrix for converting input data to component space, where `C`
         is components and `T` is the same as in `data`
+    mask : (S,) array_like
+        Boolean mask array
     acc : list
         Indices of accepted (BOLD) components in `mmix`
     rej : list

From f9810ade5814c0ddb82423d59ab566feb1e6c56e Mon Sep 17 00:00:00 2001
From: Ross Markello <rossmarkello@gmail.com>
Date: Wed, 9 May 2018 22:49:04 -0400
Subject: [PATCH 17/18] [FIX] float32 / float64 bug in `fitmodels_direct()`

---
 tedana/interfaces/tedana.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py
index 65a032f40..25be1fa07 100644
--- a/tedana/interfaces/tedana.py
+++ b/tedana/interfaces/tedana.py
@@ -110,7 +110,7 @@ def spatclust(data, mask, csize, thr, ref_img, infile=None, dindex=0,
     if infile is None:
         data = data.copy()
         data[data < thr] = 0
-        infile = filewrite(unmask(data, mask), '__clin.nii.gz', ref_img)
+        infile = filewrite(unmask(data, mask), '__clin.nii.gz', ref_img, gzip=True)
 
     # FIXME: ideally no calls to os.system!!! (or AFNI, for that matter)
     addopts = ''
@@ -503,7 +503,7 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img,
     n_samp, n_echos, n_components = betas.shape
     n_voxels = mask.sum()
     n_data_voxels = (t2s != 0).sum()
-    mu = catd.mean(axis=-1)  # BUG: THIS IS THE BAD PLACE
+    mu = catd.mean(axis=-1, dtype=float)  # BUG: THIS IS THE BAD PLACE
     tes = np.reshape(tes, (n_echos, 1))
     fmin, fmid, fmax = getfbounds(n_echos)
 
@@ -619,11 +619,12 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img,
 
             # Do simple clustering on F
             # TODO: can be replaced with nilearn.image.threshold_img
-            os.system('3dcalc -overwrite -a {}[1..2] -expr \'a*step(a-{})\' -prefix '
-                      '.fcl_in.nii.gz -overwrite'.format(ccname, fmin))
+            # TODO: fmin is being cast to an integer here -- is that purposeful?!
+            os.system('3dcalc -overwrite -a {}[1..2] -expr \'a*step(a-{:0d})\' -prefix '
+                      '.fcl_in.nii.gz -overwrite'.format(ccname, int(fmin)))
             # TODO: can be replaced with nilearn.regions.connected_regions
-            os.system('3dmerge -overwrite -dxyz=1 -1clust 1 {} -doall '
-                      '-prefix .fcl_out.nii.gz .fcl_in.nii.gz'.format(csize))
+            os.system('3dmerge -overwrite -dxyz=1 -1clust 1 {:0d} -doall '
+                      '-prefix .fcl_out.nii.gz .fcl_in.nii.gz'.format(int(csize)))
             sel = load_image('.fcl_out.nii.gz')[t2s != 0]
             sel = np.array(sel != 0, dtype=np.int)
             F_R2_clmaps[:, i] = sel[:, 0]
@@ -1071,7 +1072,7 @@ def selcomps(seldict, mmix, mask, ref_img, manacc, n_echos, olevel=2, oversion=9
         group0_res = np.intersect1d(KRguess, group0)
         phys_var_zs.append((vvex - vvex[group0_res].mean()) / vvex[group0_res].std())
         veinBout = unmask(veinmaskB, mask)
-        filewrite(veinBout.astype(int), 'veins_l%i' % t2sl_i, ref_img)
+        filewrite(veinBout.astype(float), 'veins_l%i' % t2sl_i, ref_img)
 
     # Mask to sample veins
     phys_var_z = np.array(phys_var_zs).max(0)

From ef6c34ddfb0ce5d181de075d0082e988ebd5efe6 Mon Sep 17 00:00:00 2001
From: Ross Markello <rossmarkello@gmail.com>
Date: Thu, 10 May 2018 10:38:52 -0400
Subject: [PATCH 18/18] [FIX] Address review comments for #22

Minor changes to address review comments for PR #22
---
 tedana/interfaces/t2smap.py | 4 +---
 tedana/interfaces/tedana.py | 9 ++++-----
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/tedana/interfaces/t2smap.py b/tedana/interfaces/t2smap.py
index 8e2d5d3af..275bb1695 100644
--- a/tedana/interfaces/t2smap.py
+++ b/tedana/interfaces/t2smap.py
@@ -114,7 +114,7 @@ def t2sadmap(data, tes, mask, masksum, start_echo):
         x = np.column_stack([np.ones(echo), [-te for te in tes[:echo]]])
         X = np.repeat(x, n_vols, axis=0)
 
-        beta, res, rank, sing = np.linalg.lstsq(X, B)
+        beta = np.linalg.lstsq(X, B)[0]
         t2s = 1. / beta[1, :].T
         s0 = np.exp(beta[0, :]).T
 
@@ -145,8 +145,6 @@ def make_optcom(data, t2s, tes, mask, combmode):
     """
     Optimally combine BOLD data across TEs.
 
-    out = make_optcom(data,t2s)
-
     Parameters
     ----------
     data : (S x E x T) :obj:`numpy.ndarray`
diff --git a/tedana/interfaces/tedana.py b/tedana/interfaces/tedana.py
index 25be1fa07..203e82a80 100644
--- a/tedana/interfaces/tedana.py
+++ b/tedana/interfaces/tedana.py
@@ -50,8 +50,8 @@ def do_svm(X_train, y_train, X_test, svmtype=0):
     X_test : (N2 x F) array_like
         Test vectors, where n_samples is the number of samples in the test
         dataset and n_features is the number of features.
-    svmtype : int
-        Desired support vector machine type
+    svmtype : int, optional
+        Desired support vector machine type. Must be in [0, 1, 2]. Default: 0
 
     Returns
     -------
@@ -68,7 +68,7 @@ def do_svm(X_train, y_train, X_test, svmtype=0):
     elif svmtype == 2:
         clf = svm.SVC(kernel='linear', probability=True)
     else:
-        raise ValueError('Input svmtype not in [1, 2, 3]')
+        raise ValueError('Input svmtype not in [0, 1, 2]: {}'.format(svmtype))
 
     clf.fit(X_train, y_train)
     y_pred = clf.predict(X_test)
@@ -112,7 +112,6 @@ def spatclust(data, mask, csize, thr, ref_img, infile=None, dindex=0,
         data[data < thr] = 0
         infile = filewrite(unmask(data, mask), '__clin.nii.gz', ref_img, gzip=True)
 
-    # FIXME: ideally no calls to os.system!!! (or AFNI, for that matter)
     addopts = ''
     if data is not None and data.squeeze().ndim > 1 and dindex + tindex == 0:
         addopts = '-doall'
@@ -503,7 +502,7 @@ def fitmodels_direct(catd, mmix, mask, t2s, t2sG, tes, combmode, ref_img,
     n_samp, n_echos, n_components = betas.shape
     n_voxels = mask.sum()
     n_data_voxels = (t2s != 0).sum()
-    mu = catd.mean(axis=-1, dtype=float)  # BUG: THIS IS THE BAD PLACE
+    mu = catd.mean(axis=-1, dtype=float)
     tes = np.reshape(tes, (n_echos, 1))
     fmin, fmid, fmax = getfbounds(n_echos)