-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclassification_permutation.py
37 lines (32 loc) · 1.49 KB
/
classification_permutation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import os
import pandas as pd
import numpy as np
from tqdm import tqdm # show progress bar
from permutation_functions import get_permutation_p_classification,per_get_permutation_p_classification
from imblearn.over_sampling import SMOTE
from classification_function import kfold
dt_combine_fs = pd.read_csv('your data')
dt_combine_fs['depression_group'] = 0
dt_combine_fs.loc[dt_combine_fs['t1_PHQ_total']>=10, 'depression_group'] = 1
#FOR SELECTED VARIABLES
var_feature = ['t1_PSES2',
't1_SOLI_total',
't1_EASQ_locus','t1_EASQ_stability', 't1_EASQ_globality',
't1_BIS', 't1_BAS_drive','t1_BAS_reward_responsiveness', 't1_BAS_fun_seeking',
't1_RRS_brooding','t1_UCLALS_total', 't1_SSS_total',
't1_CTQ_total', 't1_CDRISC_total', 't1_PSS_total']
selected_features = ['t1_CDRISC_total', 't1_PSS_total', 't1_RRS_brooding', 't1_UCLALS_total']
#get the IV and DV
X = dt_combine_fs[var_feature]
X_selected = dt_combine_fs[['t1_UCLALS_total', 't1_PSS_total', 't1_RRS_brooding', 't1_CDRISC_total']]
Y = dt_combine_fs['depression_group']
#SMOTE them
sm = SMOTE(random_state=0)
X_res, Y_res = sm.fit_resample(X, Y)
#get the roc auc of all factors
roc_ls, _, _, _ = kfold(X_res,Y_res)
X_res_selected, Y_res_selected = sm.fit_resample(X_selected, Y)
#get the roc auc of the four selected factors
roc_ls_selected, _, _, _ = kfold(X_res_selected, Y_res_selected)
no_permu = 500
p_value_all = get_permutation_p_classification(no_permu,np.mean(roc_ls),X_res,Y_res)