Local changes from Markus from last summer to prepare now the IUC09 c…

…onda release with the refactored NeXus-based code for Alaukik
eisenforschung · Apr 10, 2024 · 7e9832f · 7e9832f
1 parent 73d1086
commit 7e9832f
Show file tree

Hide file tree

Showing 10 changed files with 1,090 additions and 59 deletions.
diff --git a/.gitignore b/.gitignore
@@ -12,3 +12,7 @@ traj.*
 *.sub*
 *.dat
 *log*
+*.h5
+*.nxs
+*.vtu
+*.png
diff --git a/compositionspace/segmentation.py b/compositionspace/segmentation.py
@@ -2,7 +2,7 @@
 from compositionspace.models import get_model
 from sklearn.decomposition import PCA
 from sklearn.mixture import GaussianMixture
-import json 
+import json
 import h5py
 import numpy as np
 import pandas as pd
@@ -15,7 +15,7 @@
 import pyvista as pv
 
 class CompositionClustering():
-    
+
     def __init__(self, inputfile):
         if isinstance(inputfile, dict):
             self.params = inputfile
@@ -40,13 +40,13 @@ def get_PCA_cumsum(self, vox_ratio_file, vox_file):
         print(len(ratios))
         print((ratios_columns))
 
-        ratios = pd.DataFrame(data=ratios, columns=ratios_columns)   
+        ratios = pd.DataFrame(data=ratios, columns=ratios_columns)
 
         X_train=ratios.drop(['Total_no','vox'], axis=1)
-        PCAObj = PCA(n_components = len(spec_lst)) 
+        PCAObj = PCA(n_components = len(spec_lst))
         PCATrans = PCAObj.fit_transform(X_train)
         PCACumsumArr = np.cumsum(PCAObj.explained_variance_ratio_)
-        
+
         plt.figure(figsize=(5,5))
         plt.plot( range(1,len(PCACumsumArr)+1,1),PCACumsumArr,"-o")
         plt.ylabel("Explained Variance")
@@ -55,13 +55,13 @@ def get_PCA_cumsum(self, vox_ratio_file, vox_file):
         output_path = os.path.join(self.params["output_path"], "PCA_cumsum.png")
         plt.savefig(output_path)
         plt.show()
-        
+
         return PCACumsumArr, ratios
 
-    
-    
+
+
     def get_bics_minimization(self, vox_ratio_file, vox_file):
-        
+
         with h5py.File(vox_file,"r") as hdf:
             group = hdf.get("Group_sm_vox_xyz_Da_spec")
             group0 = hdf.get("0")
@@ -72,16 +72,16 @@ def get_bics_minimization(self, vox_ratio_file, vox_file):
             ratios_columns = list(list(hdfr.attrs.values())[0])
             group_name = list(list(hdfr.attrs.values())[1])
 
-        ratios = pd.DataFrame(data=ratios, columns=ratios_columns) 
-        
+        ratios = pd.DataFrame(data=ratios, columns=ratios_columns)
+
         gm_scores=[]
         aics=[]
         bics=[]
-        
+
         X_train=ratios.drop(['Total_no','vox'], axis=1)
-        
+
         n_clusters=list(range(1,self.params["bics_clusters"]))
-        
+
         pbar = tqdm(n_clusters, desc="Clustering")
         for n_cluster in pbar:
             gm = GaussianMixture(n_components=n_cluster,verbose=0)
@@ -90,16 +90,16 @@ def get_bics_minimization(self, vox_ratio_file, vox_file):
             #gm_scores.append(homogeneity_score(y,y_pred))
             aics.append(gm.aic(X_train))
             bics.append(gm.bic(X_train))
-            
+
         output_path = os.path.join(self.params["output_path"], "bics_aics.png")
         plt.plot(n_clusters, aics, "-o",label="AIC")
         plt.plot(n_clusters, bics, "-o",label="BIC")
         plt.legend()
         plt.savefig(output_path)
         plt.show()
-        return self.params["bics_clusters"], aics, bics    
-    
-   
+        return self.params["bics_clusters"], aics, bics
+
+
     def calculate_centroid(self, data):
         """
         Calculate centroid
@@ -140,22 +140,22 @@ def get_voxel_centroid(self, vox_file, files_arr):
             dic_centroids["z"] = []
             dic_centroids["file_name"] = []
             df_centroids = pd.DataFrame(columns=['x', 'y', 'z','filename'])
-            
+
             for filename in files_arr:
                 group = np.min(item_lst[[filename in range(j[0],j[1]) for j in item_lst]])
                 xyz_Da_spec_atoms = np.array(hdf.get("{}/{}".format(group, filename)))
                 x, y, z = self.calculate_centroid(xyz_Da_spec_atoms)
                 dic_centroids["x"].append(x)
                 dic_centroids["y"].append(y)
                 dic_centroids["z"].append(z)
-                dic_centroids["file_name"].append(filename)            
+                dic_centroids["file_name"].append(filename)
         return dic_centroids
 
-    
+
     def get_composition_cluster_files(self, vox_ratio_file, vox_file, n_components):
 
         ml_params = self.params["ml_models"]
-        
+
         with h5py.File(vox_file,"r") as hdf:
             group = hdf.get("Group_sm_vox_xyz_Da_spec")
             group0 = hdf.get("0")
@@ -166,21 +166,22 @@ def get_composition_cluster_files(self, vox_ratio_file, vox_file, n_components):
             ratios_columns = list(list(hdfr.attrs.values())[0])
             group_name = list(list(hdfr.attrs.values())[1])
 
-        ratios = pd.DataFrame(data=ratios, columns=ratios_columns) 
-
+        ratios = pd.DataFrame(data=ratios, columns=ratios_columns)
+        print(f"ratios {ratios}")
+
         X_train=ratios.drop(['Total_no','vox'], axis=1)
-        
+
         gm = get_model(ml_params=ml_params)
         gm.fit(X_train)
         y_pred=gm.predict(X_train)
-        
+
         cluster_lst = []
         for phase in range(n_components):
-            cluster_lst.append(np.argwhere(y_pred == phase).flatten())        
+            cluster_lst.append(np.argwhere(y_pred == phase).flatten())
         df_lst = []
         for cluster in cluster_lst:
             df_lst.append(ratios.iloc[cluster])
-            
+
         #sorting
         cluster_lst_sort = []
         len_arr = np.array([len(x) for x in cluster_lst])
@@ -190,9 +191,9 @@ def get_composition_cluster_files(self, vox_ratio_file, vox_file, n_components):
 
         #print([len(x) for x in cluster_lst_sort])
         cluster_lst = cluster_lst_sort
-        
+
         return cluster_lst, ratios
-    
+
     def get_composition_clusters(self, vox_ratio_file, vox_file, outfile="vox_centroid_file.h5"):
         voxel_centroid_output_file = []
         n_components = self.params["n_phases"]
@@ -210,7 +211,7 @@ def get_composition_clusters(self, vox_ratio_file, vox_file, outfile="vox_centro
         plot_files_group = []
         for cluster_files in plot_files:
             plot_files_group.append([int(file_num) for file_num in cluster_files ])
-            
+
         with h5py.File(vox_file,"r") as hdf_sm_r:
             hdf_sm_r = h5py.File(vox_file,"r")
             group = hdf_sm_r.get("0")
@@ -224,7 +225,7 @@ def get_composition_clusters(self, vox_ratio_file, vox_file, outfile="vox_centro
             total_voxels_int = int(total_voxels_int)
             hdf_sm_r.close()
             plot_files_cl_All_group = [file_num for file_num in range(total_voxels_int)]
-            
+
         plot_files_group.append(plot_files_cl_All_group)
         output_path = os.path.join(self.params["output_path"], outfile)
         with h5py.File(output_path,"w") as hdfw:
@@ -240,21 +241,21 @@ def get_composition_clusters(self, vox_ratio_file, vox_file, outfile="vox_centro
 
         self.voxel_centroid_output_file = output_path
 
-    
+
     def generate_plots(self):
 
         vtk_files = []
-        with h5py.File(self.voxel_centroid_output_file, "r") as hdfr:            
+        with h5py.File(self.voxel_centroid_output_file, "r") as hdfr:
             groups =list(hdfr.keys())
             for group in range(len(groups)-1):
                 phase_arr =  np.array(hdfr.get(f"{group}/{group}"))
                 phase_columns = list(list(hdfr.get(f"{group}").attrs.values())[0])
                 phase_cent_df =pd.DataFrame(data=phase_arr, columns=phase_columns)
-                
+
                 image = phase_cent_df.values
-                
+
                 file_path = self.voxel_centroid_output_file + f"_{group}"
-               
+
                 vtk_files.append(file_path + ".vtu")
 
                 x = np.ascontiguousarray(image[:,0])
@@ -265,7 +266,7 @@ def generate_plots(self):
                 pointsToVTK(file_path, x, y, z, data = {"label" : label}  )
         self.vtk_files = vtk_files
 
-    
+
     def plot3d(self, **kwargs):
         self.generate_plots()
         for file in self.vtk_files:

diff --git a/h5web.ipynb b/h5web.ipynb
@@ -0,0 +1,67 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "74eeaec9-29f6-45bc-a83b-b3a02b5dde42",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from jupyterlab_h5web import H5Web\n",
+    "from ifes_apt_tc_data_modeling import apt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "070c49fd-4394-4592-8f3d-e672af77e978",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/x-hdf5": "/home/mkuehbach/Sprint15/NfdiMatWerkConference/CompositionSpace/tests/output/file_R31_06365-v02_pos_large_chunks_arr.h5",
+      "text/plain": [
+       "<jupyterlab_h5web.widget.H5Web object>"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "fnm = \"tests/output/file_R31_06365-v02_pos_large_chunks_arr.h5\"\n",
+    "H5Web(fnm)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ea7fe220-1875-4ae8-9edc-a915d1a87107",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/tests/20220808_test_Git_1.ipynb b/tests/20220808_test_Git_1.ipynb
@@ -96,6 +96,7 @@
    "source": [
     "data = DataPreparation(\"experiment_params.yaml\")\n",
     "data.get_big_slices()\n",
+    "data.get_big_slices_molecules()\n",
     "data.get_voxels()\n",
     "data.calculate_voxel_composition()"
    ]
-Original file line number
+Diff line change
@@ Expand Up / @@ -12,3 +12,7 @@ traj.* @@
     *.sub*
     *.dat
     *log*
+    *.h5
+    *.nxs
+    *.vtu
+    *.png