-
Notifications
You must be signed in to change notification settings - Fork 44
/
Copy pathMachine Learning based intergration.R
246 lines (190 loc) · 8 KB
/
Machine Learning based intergration.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
rm(list = ls())
library(survival)
library(randomForestSRC)
library(glmnet)
library(plsRcox)
library(superpc)
library(gbm)
library(CoxBoost)
library(survivalsvm)
library(dplyr)
library(tibble)
library(BART)
load('model-step.Rda')
rm(rr,rr2,rr3,geneids)
mm <- lapply(mm,function(x){
x[,-c(1:3)] <- scale(x[,-c(1:3)])
return(x)})
##################################
#### 准备工作 ####
##################################
result <- data.frame()
est_data <- mm$TCGA
val_data_list <- mm
pre_var <- colnames(est_data)[-c(1:3)]
est_dd <- est_data[,c('OS.time','OS',pre_var)]
val_dd_list <- lapply(val_data_list,function(x){x[,c('OS.time','OS',pre_var)]})
rm(mm)
rf_nodesize <- 5
seed <- 1
##################################
#### 1-1.RSF ####
##################################
set.seed(seed)
fit <- rfsrc(Surv(OS.time,OS)~.,data = est_dd,
ntree = 1000,nodesize = rf_nodesize,##该值建议多调整
splitrule = 'logrank',
importance = T,
proximity = T,
forest = T,
seed = seed)
rs <- lapply(val_dd_list,function(x){cbind(x[,1:2],RS=predict(fit,newdata = x)$predicted)})
cc <- data.frame(Cindex=sapply(rs,function(x){as.numeric(summary(coxph(Surv(OS.time,OS)~RS,x))$concordance[1])}))%>%
rownames_to_column('ID')
cc$Model <- 'RSF'
result <- rbind(result,cc)
##################################
#### 2-1.Enet ####
##################################
x1 <- as.matrix(est_dd[,pre_var])
x2 <- as.matrix(Surv(est_dd$OS.time,est_dd$OS))
for (alpha in seq(0,1,0.1)) {
set.seed(seed)
fit = cv.glmnet(x1, x2,family = "cox",alpha=alpha,nfolds = 10)
rs <- lapply(val_dd_list,function(x){cbind(x[,1:2],RS=as.numeric(predict(fit,type='link',newx=as.matrix(x[,-c(1,2)]),s=fit$lambda.min)))})
cc <- data.frame(Cindex=sapply(rs,function(x){as.numeric(summary(coxph(Surv(OS.time,OS)~RS,x))$concordance[1])}))%>%
rownames_to_column('ID')
cc$Model <- paste0('Enet','[α=',alpha,']')
result <- rbind(result,cc)
}
##################################
#### 3-1.StepCox ####
##################################
for (direction in c("both", "backward", "forward")) {
fit <- step(coxph(Surv(OS.time,OS)~.,est_dd),direction = direction)
rs <- lapply(val_dd_list,function(x){cbind(x[,1:2],RS=predict(fit,type = 'risk',newdata = x))})
cc <- data.frame(Cindex=sapply(rs,function(x){as.numeric(summary(coxph(Surv(OS.time,OS)~RS,x))$concordance[1])}))%>%
rownames_to_column('ID')
cc$Model <- paste0('StepCox','[',direction,']')
result <- rbind(result,cc)
}
##################################
#### 3-8.StepCox+gbm ####
##################################
for (direction in c("both", "backward")) {
fit <- step(coxph(Surv(OS.time,OS)~.,est_dd),direction = direction)
rid <- names(coef(fit))
est_dd2 <- est_data[,c('OS.time','OS',rid)]
val_dd_list2 <- lapply(val_data_list,function(x){x[,c('OS.time','OS',rid)]})
fit = survivalsvm(Surv(OS.time,OS)~., data= est_dd2, gamma.mu = 1)
rs <- lapply(val_dd_list2,function(x){cbind(x[,1:2],RS=as.numeric(predict(fit, x)$predicted))})
cc <- data.frame(Cindex=sapply(rs,function(x){as.numeric(summary(coxph(Surv(OS.time,OS)~RS,x))$concordance[1])}))%>%
rownames_to_column('ID')
cc$Model <- paste0('StepCox','[',direction,']',' + survival-SVM')
result <- rbind(result,cc)
}
##################################
#### 4-1.CoxBoost ####
##################################
set.seed(seed)
pen <- optimCoxBoostPenalty(est_dd[,'OS.time'],est_dd[,'OS'],as.matrix(est_dd[,-c(1,2)]),
trace=TRUE,start.penalty=500,parallel = T)
cv.res <- cv.CoxBoost(est_dd[,'OS.time'],est_dd[,'OS'],as.matrix(est_dd[,-c(1,2)]),
maxstepno=500,K=10,type="verweij",penalty=pen$penalty)
fit <- CoxBoost(est_dd[,'OS.time'],est_dd[,'OS'],as.matrix(est_dd[,-c(1,2)]),
stepno=cv.res$optimal.step,penalty=pen$penalty)
rs <- lapply(val_dd_list,function(x){cbind(x[,1:2],RS=as.numeric(predict(fit,newdata=x[,-c(1,2)], newtime=x[,1], newstatus=x[,2], type="lp")))})
cc <- data.frame(Cindex=sapply(rs,function(x){as.numeric(summary(coxph(Surv(OS.time,OS)~RS,x))$concordance[1])}))%>%
rownames_to_column('ID')
cc$Model <- paste0('CoxBoost')
result <- rbind(result,cc)
##################################
#### 5.plsRcox####
##################################
set.seed(seed)
cv.plsRcox.res=cv.plsRcox(list(x=est_dd[,pre_var],time=est_dd$OS.time,status=est_dd$OS),nt=10,verbose = FALSE)
fit <- plsRcox(est_dd[,pre_var],time=est_dd$OS.time,event=est_dd$OS,nt=as.numeric(cv.plsRcox.res[5]))
rs <- lapply(val_dd_list,function(x){cbind(x[,1:2],RS=as.numeric(predict(fit,type="lp",newdata=x[,-c(1,2)])))})
cc <- data.frame(Cindex=sapply(rs,function(x){as.numeric(summary(coxph(Surv(OS.time,OS)~RS,x))$concordance[1])}))%>%
rownames_to_column('ID')
cc$Model <- paste0('plsRcox')
result <- rbind(result,cc)
##################################
#### 6.superpc####
##################################
data <- list(x=t(est_dd[,-c(1,2)]),y=est_dd$OS.time,censoring.status=est_dd$OS,featurenames=colnames(est_dd)[-c(1,2)])
set.seed(seed)
fit <- superpc.train(data = data,type = 'survival',s0.perc = 0.5) #default
cv.fit <- superpc.cv(fit,data,n.threshold = 20,#default
n.fold = 10,
n.components=3,
min.features=5,
max.features=nrow(data$x),
compute.fullcv= TRUE,
compute.preval=TRUE)
rs <- lapply(val_dd_list,function(w){
test <- list(x=t(w[,-c(1,2)]),y=w$OS.time,censoring.status=w$OS,featurenames=colnames(w)[-c(1,2)])
ff <- superpc.predict(fit,data,test,threshold = cv.fit$thresholds[which.max(cv.fit[["scor"]][1,])],n.components = 1)
rr <- as.numeric(ff$v.pred)
rr2 <- cbind(w[,1:2],RS=rr)
return(rr2)
})
cc <- data.frame(Cindex=sapply(rs,function(x){as.numeric(summary(coxph(Surv(OS.time,OS)~RS,x))$concordance[1])}))%>%
rownames_to_column('ID')
cc$Model <- paste0('SuperPC')
result <- rbind(result,cc)
##################################
#### 7.GBM ####
##################################
set.seed(seed)
fit <- gbm(formula = Surv(OS.time,OS)~.,data = est_dd,distribution = 'coxph',
n.trees = 10000,
interaction.depth = 3,
n.minobsinnode = 10,
shrinkage = 0.001,
cv.folds = 10,n.cores = 6)
# find index for number trees with minimum CV error
best <- which.min(fit$cv.error)
set.seed(seed)
fit <- gbm(formula = Surv(OS.time,OS)~.,data = est_dd,distribution = 'coxph',
n.trees = best,
interaction.depth = 3,
n.minobsinnode = 10,
shrinkage = 0.001,
cv.folds = 10,n.cores = 8)
rs <- lapply(val_dd_list,function(x){cbind(x[,1:2],RS=as.numeric(predict(fit,x,n.trees = best,type = 'link')))})
cc <- data.frame(Cindex=sapply(rs,function(x){as.numeric(summary(coxph(Surv(OS.time,OS)~RS,x))$concordance[1])}))%>%
rownames_to_column('ID')
cc$Model <- paste0('GBM')
result <- rbind(result,cc)
##################################
#### 8.survivalsvm ####
##################################
fit = survivalsvm(Surv(OS.time,OS)~., data= est_dd, gamma.mu = 1)
rs <- lapply(val_dd_list,function(x){cbind(x[,1:2],RS=as.numeric(predict(fit, x)$predicted))})
cc <- data.frame(Cindex=sapply(rs,function(x){as.numeric(summary(coxph(Surv(OS.time,OS)~RS,x))$concordance[1])}))%>%
rownames_to_column('ID')
cc$Model <- paste0('survival-SVM')
result <- rbind(result,cc)
result2 <- result
result2$Model <- gsub('α','a',result2$Model)
library(ggplot2)
library(ggsci)
library(tidyr)
library(ggbreak)
range(result2$Cindex)
result2%>%filter(ID!='TCGA')%>%
ggplot(aes(Cindex,reorder(Model,Cindex)))+
geom_bar(width = 0.7,stat = 'summary',fun='mean',fill='orange2')+
theme_classic()+
labs(y=NULL)
dd <- result2%>%
#filter(ID!='TCGA')%>%
group_by(Model)%>%
summarise(Cindex=mean(Cindex))
dd%>%
ggplot(aes(Cindex,reorder(Model,Cindex)))+
geom_bar(width=0.7,stat = 'identity',fill='orange')+
scale_x_break(c(0.05,0.53),scales = 20)
dd2 <- pivot_wider(result2,names_from = 'ID',values_from = 'Cindex')%>%as.data.frame()
dd2[,-1] <- apply(dd2[,-1],2,as.numeric)