Skip to content

Commit

Permalink
Local changes from Markus from last summer to prepare now the IUC09 c…
Browse files Browse the repository at this point in the history
…onda release with the refactored NeXus-based code for Alaukik
  • Loading branch information
mkuehbach committed Apr 10, 2024
1 parent 73d1086 commit 7e9832f
Show file tree
Hide file tree
Showing 10 changed files with 1,090 additions and 59 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,7 @@ traj.*
*.sub*
*.dat
*log*
*.h5
*.nxs
*.vtu
*.png
77 changes: 39 additions & 38 deletions compositionspace/segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from compositionspace.models import get_model
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture
import json
import json
import h5py
import numpy as np
import pandas as pd
Expand All @@ -15,7 +15,7 @@
import pyvista as pv

class CompositionClustering():

def __init__(self, inputfile):
if isinstance(inputfile, dict):
self.params = inputfile
Expand All @@ -40,13 +40,13 @@ def get_PCA_cumsum(self, vox_ratio_file, vox_file):
print(len(ratios))
print((ratios_columns))

ratios = pd.DataFrame(data=ratios, columns=ratios_columns)
ratios = pd.DataFrame(data=ratios, columns=ratios_columns)

X_train=ratios.drop(['Total_no','vox'], axis=1)
PCAObj = PCA(n_components = len(spec_lst))
PCAObj = PCA(n_components = len(spec_lst))
PCATrans = PCAObj.fit_transform(X_train)
PCACumsumArr = np.cumsum(PCAObj.explained_variance_ratio_)

plt.figure(figsize=(5,5))
plt.plot( range(1,len(PCACumsumArr)+1,1),PCACumsumArr,"-o")
plt.ylabel("Explained Variance")
Expand All @@ -55,13 +55,13 @@ def get_PCA_cumsum(self, vox_ratio_file, vox_file):
output_path = os.path.join(self.params["output_path"], "PCA_cumsum.png")
plt.savefig(output_path)
plt.show()

return PCACumsumArr, ratios



def get_bics_minimization(self, vox_ratio_file, vox_file):

with h5py.File(vox_file,"r") as hdf:
group = hdf.get("Group_sm_vox_xyz_Da_spec")
group0 = hdf.get("0")
Expand All @@ -72,16 +72,16 @@ def get_bics_minimization(self, vox_ratio_file, vox_file):
ratios_columns = list(list(hdfr.attrs.values())[0])
group_name = list(list(hdfr.attrs.values())[1])

ratios = pd.DataFrame(data=ratios, columns=ratios_columns)
ratios = pd.DataFrame(data=ratios, columns=ratios_columns)

gm_scores=[]
aics=[]
bics=[]

X_train=ratios.drop(['Total_no','vox'], axis=1)

n_clusters=list(range(1,self.params["bics_clusters"]))

pbar = tqdm(n_clusters, desc="Clustering")
for n_cluster in pbar:
gm = GaussianMixture(n_components=n_cluster,verbose=0)
Expand All @@ -90,16 +90,16 @@ def get_bics_minimization(self, vox_ratio_file, vox_file):
#gm_scores.append(homogeneity_score(y,y_pred))
aics.append(gm.aic(X_train))
bics.append(gm.bic(X_train))

output_path = os.path.join(self.params["output_path"], "bics_aics.png")
plt.plot(n_clusters, aics, "-o",label="AIC")
plt.plot(n_clusters, bics, "-o",label="BIC")
plt.legend()
plt.savefig(output_path)
plt.show()
return self.params["bics_clusters"], aics, bics
return self.params["bics_clusters"], aics, bics


def calculate_centroid(self, data):
"""
Calculate centroid
Expand Down Expand Up @@ -140,22 +140,22 @@ def get_voxel_centroid(self, vox_file, files_arr):
dic_centroids["z"] = []
dic_centroids["file_name"] = []
df_centroids = pd.DataFrame(columns=['x', 'y', 'z','filename'])

for filename in files_arr:
group = np.min(item_lst[[filename in range(j[0],j[1]) for j in item_lst]])
xyz_Da_spec_atoms = np.array(hdf.get("{}/{}".format(group, filename)))
x, y, z = self.calculate_centroid(xyz_Da_spec_atoms)
dic_centroids["x"].append(x)
dic_centroids["y"].append(y)
dic_centroids["z"].append(z)
dic_centroids["file_name"].append(filename)
dic_centroids["file_name"].append(filename)
return dic_centroids


def get_composition_cluster_files(self, vox_ratio_file, vox_file, n_components):

ml_params = self.params["ml_models"]

with h5py.File(vox_file,"r") as hdf:
group = hdf.get("Group_sm_vox_xyz_Da_spec")
group0 = hdf.get("0")
Expand All @@ -166,21 +166,22 @@ def get_composition_cluster_files(self, vox_ratio_file, vox_file, n_components):
ratios_columns = list(list(hdfr.attrs.values())[0])
group_name = list(list(hdfr.attrs.values())[1])

ratios = pd.DataFrame(data=ratios, columns=ratios_columns)

ratios = pd.DataFrame(data=ratios, columns=ratios_columns)
print(f"ratios {ratios}")

X_train=ratios.drop(['Total_no','vox'], axis=1)

gm = get_model(ml_params=ml_params)
gm.fit(X_train)
y_pred=gm.predict(X_train)

cluster_lst = []
for phase in range(n_components):
cluster_lst.append(np.argwhere(y_pred == phase).flatten())
cluster_lst.append(np.argwhere(y_pred == phase).flatten())
df_lst = []
for cluster in cluster_lst:
df_lst.append(ratios.iloc[cluster])

#sorting
cluster_lst_sort = []
len_arr = np.array([len(x) for x in cluster_lst])
Expand All @@ -190,9 +191,9 @@ def get_composition_cluster_files(self, vox_ratio_file, vox_file, n_components):

#print([len(x) for x in cluster_lst_sort])
cluster_lst = cluster_lst_sort

return cluster_lst, ratios

def get_composition_clusters(self, vox_ratio_file, vox_file, outfile="vox_centroid_file.h5"):
voxel_centroid_output_file = []
n_components = self.params["n_phases"]
Expand All @@ -210,7 +211,7 @@ def get_composition_clusters(self, vox_ratio_file, vox_file, outfile="vox_centro
plot_files_group = []
for cluster_files in plot_files:
plot_files_group.append([int(file_num) for file_num in cluster_files ])

with h5py.File(vox_file,"r") as hdf_sm_r:
hdf_sm_r = h5py.File(vox_file,"r")
group = hdf_sm_r.get("0")
Expand All @@ -224,7 +225,7 @@ def get_composition_clusters(self, vox_ratio_file, vox_file, outfile="vox_centro
total_voxels_int = int(total_voxels_int)
hdf_sm_r.close()
plot_files_cl_All_group = [file_num for file_num in range(total_voxels_int)]

plot_files_group.append(plot_files_cl_All_group)
output_path = os.path.join(self.params["output_path"], outfile)
with h5py.File(output_path,"w") as hdfw:
Expand All @@ -240,21 +241,21 @@ def get_composition_clusters(self, vox_ratio_file, vox_file, outfile="vox_centro

self.voxel_centroid_output_file = output_path


def generate_plots(self):

vtk_files = []
with h5py.File(self.voxel_centroid_output_file, "r") as hdfr:
with h5py.File(self.voxel_centroid_output_file, "r") as hdfr:
groups =list(hdfr.keys())
for group in range(len(groups)-1):
phase_arr = np.array(hdfr.get(f"{group}/{group}"))
phase_columns = list(list(hdfr.get(f"{group}").attrs.values())[0])
phase_cent_df =pd.DataFrame(data=phase_arr, columns=phase_columns)

image = phase_cent_df.values

file_path = self.voxel_centroid_output_file + f"_{group}"

vtk_files.append(file_path + ".vtu")

x = np.ascontiguousarray(image[:,0])
Expand All @@ -265,7 +266,7 @@ def generate_plots(self):
pointsToVTK(file_path, x, y, z, data = {"label" : label} )
self.vtk_files = vtk_files


def plot3d(self, **kwargs):
self.generate_plots()
for file in self.vtk_files:
Expand Down
67 changes: 67 additions & 0 deletions h5web.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"id": "74eeaec9-29f6-45bc-a83b-b3a02b5dde42",
"metadata": {},
"outputs": [],
"source": [
"from jupyterlab_h5web import H5Web\n",
"from ifes_apt_tc_data_modeling import apt"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "070c49fd-4394-4592-8f3d-e672af77e978",
"metadata": {},
"outputs": [
{
"data": {
"application/x-hdf5": "/home/mkuehbach/Sprint15/NfdiMatWerkConference/CompositionSpace/tests/output/file_R31_06365-v02_pos_large_chunks_arr.h5",
"text/plain": [
"<jupyterlab_h5web.widget.H5Web object>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fnm = \"tests/output/file_R31_06365-v02_pos_large_chunks_arr.h5\"\n",
"H5Web(fnm)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ea7fe220-1875-4ae8-9edc-a915d1a87107",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
1 change: 1 addition & 0 deletions tests/20220808_test_Git_1.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@
"source": [
"data = DataPreparation(\"experiment_params.yaml\")\n",
"data.get_big_slices()\n",
"data.get_big_slices_molecules()\n",
"data.get_voxels()\n",
"data.calculate_voxel_composition()"
]
Expand Down
Loading

0 comments on commit 7e9832f

Please sign in to comment.