-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathDGE_analysis.R
50 lines (38 loc) · 1.5 KB
/
DGE_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
### Boas Pucker ###
### bpucker@cebitec.uni-bielefeld.de ###
### DGE analysis for Arabidopsis RNA-Seq samples in Applied Genome Research course ###
library("DESeq2")
# --- loading sampleTable --- #
csvfile <- "clean_sample_table.txt"
sampleTable <- read.csv(csvfile, row.names=1, sep="\t")
sampleTable$genotype <- as.factor( sampleTable$genotype )
summary(sampleTable)
# --- loading the data matrix --- #
count_data_file <- "clean_data_matrix.txt"
countdata <- read.csv(count_data_file,row.names=1, header=T, sep="\t")
summary(countdata)
# --- construction of DESeqDataSet --- #
ddsMat <- DESeqDataSetFromMatrix( countData=countdata, colData=sampleTable, design= ~ genotype )
nrow(ddsMat)
# -- removal of not or low expressed genes --- #
dds <- ddsMat[ rowSums(counts(ddsMat)) > 100, ]
nrow(dds)
# --- plot PCA in R studio --- #
rld <- rlog(dds)
ramp <- 1:2/2
cols <- c( rgb(ramp, 0, 0), rgb(0, ramp, 0), rgb(ramp, 0, ramp), rgb(ramp, 0, ramp) )
print ( plotPCA( rld, intgroup=c( "genotype") ) )
# --- differential expression analysis --- #
dds <- DESeq(dds)
res <- results(dds)
summary(res)
# --- investigate differentially expressed genes --- #
res.05 <- results( dds, alpha=.05 )
table(res.05$padj < .05 )
small.pvalue.index <- head( order( res$padj ), 20 )
names <- row.names(res)
( sig.gene.names <- names[ small.pvalue.index ] )
outputfile <- "differentially_expressed_genes.txt"
write( sig.gene.names, outputfile, ncolumns=length( sig.gene.names ), sep="\n" )
# --- print session information --- #
sessionInfo()