-
Notifications
You must be signed in to change notification settings - Fork 0
/
cmatrices.py
205 lines (156 loc) · 7.8 KB
/
cmatrices.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
from scipy.sparse import coo_matrix
from scipy.stats import spearmanr
from copy import deepcopy
import joblib
import nibabel as nib
import numpy as np
import pandas as pd
def mask_img(img,val):
"""masks an image at a given value - borrowed from de la Vega"""
img = deepcopy(img)
data = img.get_data()
data[:] = np.round(data)
data[data!=val] = 0
data[data==val] = 1
return img
class NeedsNewName(object):
""" The core object for working with sparse connectivity matrices.
Args:
data (scipy coo_matrix): The connectivity matrix, whole-brain-by-whole-brain.
reference (nibabel nifti1): This nifti file holds the identities of the voxels
in the connectivity matrix. FDT matrices are labeled 'voxel 1, 2, etc. This nifti
is the conversion between FDT coordinates and nibabel coordinates.
mask (nibabel nifti1): A mask for the columns of the connectivity matrix. Most
often will be a gray-matter mask."""
def __init__(self,data,reference,mask=None):
self.data = joblib.load(data)
self.reference = reference.get_data()
self.mask = mask
def init_nifti(self,img_data):
"""for saving images """
header = self.reference.header
header.set_data_dtype(img_data.dtype)
header['cal_max'] = img_data.max()
header['cal_min'] = img_data.min()
return nib.nifti1.Nifti1Image(img_data, affine=self.reference.affine,header=header)
def get_roi_matrix(self,img):
"""a function for generating a connectivity matrix between every voxel in
an roi and every voxel in the gray-matter mask"""
r,c,vals = self.data.row, self.data.col, self.data.data
r,c = r.astype(int), c.astype(int)
#constructing roi_matrix
img_data = img.get_data()
roi_coords = self.reference[np.where(img_data==1)]
roi_matrix_indices = []
for i in roi_coords:
roi_matrix_indices = np.concatenate((roi_matrix_indices,np.where(r==i)), axis=None)
roi_matrix_indices = roi_matrix_indices.astype(int)
roi_rows = r[roi_matrix_indices].astype(int)
roi_cols = c[roi_matrix_indices].astype(int)
roi_vals = vals[roi_matrix_indices]
# this is for renaming the rows so that the matrix has the proper shape
reduced_rows = roi_rows
for i,vox in enumerate(vals,0):
reduced_rows[np.where(reduced_rows==vox)] = i+1
return coo_matrix((roi_vals,(reduced_rows,roi_cols)))
def cluster(self,img,model,n_clusters=3, method='pearson'):
"""runs a clustering algorithm on a given roi"""
roi_coords = self.reference[np.where(img.get_data()==1)]
mat = self.get_roi_matrix(img)
mat = mat.toarray()
# matrices generated with scipy coo_matrix have a 0 row and column, we'll remove them
mat = np.delete(mat,0,axis=0)
mat = np.delete(mat,0,axis=1)
if method=='pearson':
CC = np.corrcoef(mat)
elif method=='spearman':
CC = spearmanr(mat)[0]
else:
raise Exception('method should be either pearson or spearman. \
The method was: {}'.format(method))
CC = np.nan_to_num(mat)
labels = model(n_clusters=n_clusters).fit_predict(CC) + 1
clusters = np.zeros([91,109,91])
# this is a new way that I'm trying to convert clustering results to a nifti
for i in range(1,labels.max() + 1):
indices = np.where(labels==i)
indices = np.array(indices)
indices = indices.reshape(indices.shape[1])
cluster_indices = roi_coords[indices].astype(int)
clusters[np.where(np.isin(self.reference,cluster_indices))] = i
return self.init_nifti(clusters)
def sum_streamline_count(self,img):
"""generates an array of the streamline count between a cluster and each voxel in the rest of the brain """
mat = self.get_roi_matrix(img)
vals = mat.toarray().sum(axis=1) # get the total streamline count by summing the values of each column
vals = np.delete(vals,0) # the zero voxel doesn't exist so we'll remove it
return vals
def get_cluster_similarity(self,img,method='pearson'):
"""for comparing the similarity of connectivity distributions bewteen different clusters.
returns a correlation matrix """
connectivity_vectors = []
for i in range(1,img.get_data().max() + 1):
cluster = mask_img(img,i)
connectivity_vectors[i-1] = self.sum_streamline_count(cluster)
mat = np.vstack((connectivity_vectors[:]))
if method=='pearson':
CC = np.corrcoef(mat)
elif method=='spearman':
CC = spearmanr(mat)[0]
else:
raise Exception('method should be either pearson or spearman. \
The method was: {}'.format(method))
return np.nan_to_num(CC)
def get_paths(self,img): # still working on it
"""creates fsl fdt_paths-style images for each cluster in an image"""
images = [np.zeros([91,109,91]) for i in range(img.get_data().max())]
for i in range(img.get_data().max()):
cluster = mask_img(img,i+1)
connections = self.sum_streamline_count(cluster)
for j in range(len(connections)):
images[i][np.where(self.reference==j+1)] = connections[j]
return [self.init_nifti(image) for image in images]
def connections_to_targets(self,img,targets,labels=None):
"""given an roi it returns a dataframe of connections between the roi and each of the targets """
stat_map = self.get_paths(img)
stat_data = stat_map.get_data()
target_data = nib.load(targets).get_data().round()
connections = [stat_data[np.where(target_data==i).sum()] for i in range(1,target_data.max() + 1)]
df = pd.DataFrame()
df['connections'] = pd.Series(connections)
if labels is not None:
df['labels'] = pd.Series(labels)
return df
# work in progress
#def find_the_biggest(self,*args):
# """hard segmentation based on fdt find_the_biggest """
# labels = np.arange(1,len(args)+1)
# brain_data = [nib.load(arg).get_data() for arg in args]
# segmented_brain = np.zeros([91,109,91])
# for_completement = np.arange(0,len(args))
# for i in range(len(brain_data)):
# complement = np.delete(for_completement,i)
# indices = np.where(brain_data[i]>brain_data[j] for j in complement) # not real python syntax
# segmented_brain[indices] = labels[i]
# return self.init_nifti(segmented_brain)
# work in progress
#def network(self,regions,labels=None):
# """returns a dataframe of streamline counts between all regions"""
# region_data = nib.load(regions).get_data()
#
# # if no labels are provided, regions will have numeric labels
# if labels is None:
# labels = np.arange(region_data.max())
#
# df = pd.DataFrame(columns=labels,index=labels)
#
# for i in range(1,region_data.max()+1):
# region = mask_img(regions)
# region_connections = self.connections_to_targets(region,regions)
#
#
# return df
def save(self,filename):
joblib.dump(self,filename)
def load(self,filename):
joblib.load(filename)