-
Notifications
You must be signed in to change notification settings - Fork 0
/
limma_deg.R
69 lines (56 loc) · 2.74 KB
/
limma_deg.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
'''
title:"DEG instruction for expression matrix"
author:"Mr Hong alec"
date:"2019-9-8"
title:"DataSet generated by Chip Seq
'''
### garnerring expression matrix
obj<-gset[[1]]
exprSet<-as.data.frame(exprs(obj))
### sample obtained
samples<-sampleNames(obj)
p_data<-pData(obj) #information
### getting genes probe Annotation: GPL1355
#probe2gene<-getGEO('GPL1355',destdir='.')
#colnames(Table(probe2gene))
table(table(probe2gene_1$ID)>1) #check whether presentting one to more probes
library(dplyr)
### merge
exprSet$ID<-rownames(exprSet)
combined_expr_symbol<-right_join(exprSet,probe2gene_1,by='ID')
library(stringr)
names(combined_expr_symbol)<-str_replace_all(names(combined_expr_symbol), c(" " = "_" , "," = "" ))
combined_expr_symbol$Gene_Symbol<-str_replace_all(combined_expr_symbol$Gene_Symbol, c(" " = "" , "///" = "_" ))
combined_expr_symbol<-select(combined_expr_symbol,Gene_Symbol,1:length(colnames(exprSet)),-ID)
combined_expr_symbol1<-combined_expr_symbol[which(table(combined_expr_symbol[,1])==1),]
eventual_result<-combined_expr_symbol1 %>% na_if("")%>%na.omit #na_if("") delete certainty blank in row
### DEG through limma
library(limma)
group_list1<-p_data[,1:2]
head(group_list1)
### extract treatment information
pattern<-"^(\\d{1,2})[ ]hour[ ]([a-zA-Z.0-9_]{0,10})[ ].*biological[ ]rep(\\d){1}$"
treatment <- as.character(paste('H',str_match(group_list1$title,pattern = pattern)[,2],sep=''))
design1<-model.matrix(~0+factor(treatment)) #construct difference expression matrix
colnames(design1)<-levels(factor(treatment))
rownames(design1)<-colnames(eventual_result)[2:length(colnames(eventual_result))]
contrast.matrix<-makeContrasts(paste0(unique(treatment), #paste
collapse = '-'),levels=design1)
#step1 Start with DEG analysis:
eventual_result1<-eventual_result[!duplicated(eventual_result[,1]),] #eliminate duplicated values
temp<-remove_rownames(eventual_result1) #remove column name existed
eventual_result2<-temp%>%column_to_rownames('Gene_Symbol') #using Gene_Symbol as data.frame eventual_result new colnames called temp
#has_rownames(temp)
fit_result<-lmFit(eventual_result2,design1)
#step2
fit_result1<-contrasts.fit(fit_result,contrast.matrix) ##extremely significant step
fit_result2<-eBayes(fit_result1) #default no trend ,##eBayes with trend =TRUE
#step3
Outtable=topTable(fit_result2,coef=1,n=Inf) #get deg results
complete_result=na.omit(Outtable) #elimination na values
complete_result$ID<-rownames(complete_result)
complete_result_ord=arrange(complete_result,desc(logFC))
if(!has_rownames(complete_result_ord)){
complete_result_ord<-complete_result_ord%>%column_to_rownames('ID')
}
write.table(complete_result_ord,file = 'limma_chip_seq_deg.xls',quote=F,sep = '\t',row.names = T,col.names = T) #no save