-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathboxcox_transform.py
32 lines (28 loc) · 1.01 KB
/
boxcox_transform.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#!/usr/bin/env python3
'''
perform boxcox transformation on all specified columns in dataframe
create new dataframe columns containing the transformed data
'''
# import packages
import pandas as pd
from scipy import stats
from scipy.stats import boxcox
import matplotlib.pyplot as plt
import seaborn as sns
# read data
df=pd.read_csv('<path/to/infile')
loop_cols = df.loc[:, ~df.columns.isin(['grp', 'id_subj'])] #do not transform these columns
#loop_cols = ['col1','col2'] #transform these columns
'''perform boxcox transform on specified columns'''
for col in loop_cols:
fitted_data, fitted_lambda = stats.boxcox(df[col])
df[col + '_bxcx'] = fitted_data
'''retain only transformed cols'''
df=df.filter(regex='grp|id_subj|bxcx')
'''plot kde again to compare'''
df_check=df.drop(['grp', 'id_subj'], axis=1)
fig, axes = plt.subplots(ncols=len(df_check.columns), figsize=(20,8))
for ax, col in zip(axes, df_check.columns):
sns.histplot(df_check[col], ax=ax, kde=True)
plt.tight_layout()
plt.show()