-
Notifications
You must be signed in to change notification settings - Fork 0
/
write_data_for_lefse (复件).R
executable file
·64 lines (47 loc) · 2.65 KB
/
write_data_for_lefse (复件).R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#library(optparse)
library(stringr)
#option_list <- list(
# make_option(c("-i", "--input"),metavar="path", dest="otu",help="Specify the path of collapsed bacteria taxonomy file.The first row must not contain any comments",default=NULL),
# make_option(c("-m", "--map"),metavar="path",dest="map", help="Specify the path of mapping file",default=NULL),
# make_option(c("-c", "--category"),metavar="string",dest="group", help="Specify category name in mapping file. You may specify more than one category seprated by commas.",default="none"),
# make_option(c("-o", "--output"),metavar="path",dest="out", help="Specify the path of output file",default="./")
# )
#opt <- parse_args(OptionParser(option_list=option_list,description = "This script is used to write the input file of Lefse."))
#if(!dir.exists(opt$out)){dir.create(opt$out,recursive = T)}
ag<-commandArgs(T)
if(length(ag)<5){
write("please specify:
1.Input file, with taxonomy at last column, id or short name at first column
2.Mapping file
3.Categories names seprated by ','
4.Path of out file
5.T(Skip the first line(e.g. comment line) while reading abundance table) or F(not skip first line)
6. f(use first column as feature name) or l(use last column as feature name)
Sample Usage:
Rscript write_data_for_lefse.R otu_table_with_comment.txt mapping_file.txt Group1 Group1_table_for_lefse.txt T", stdout())
}else{
meta<-read.table(ag[2],na.strings="",row.names=1,header = T,sep = "\t",comment.char = "",check.names = F,stringsAsFactors = F)
group<-str_split(ag[3],",")[[1]]
meta<-na.omit(meta[group])
meta<-data.frame(Subject=rownames(meta),meta)
if(as.logical(ag[5])){
data<-read.table(ag[1],quote="",skip=1,header = T,sep = "\t",comment.char = "",stringsAsFactors = F,check.names = F)
}else{
data<-read.table(ag[1],quote="",header = T,sep = "\t",comment.char = "",stringsAsFactors = F,check.names = F)
}
#calculate the relative abundance
#data[,2:(ncol(data)-1)]=apply(data[,2:(ncol(data)-1)],2,function(x){x/sum(x)})
data<-data[data[,1]!="Others"&data[,1]!="unclassified",]
data<-data[,c(ncol(data),2:(ncol(data)-1))]
data[,1]<-str_replace(data[,1],"; *[a-z]__ *;.*$","")
data[,1]<-str_replace(data[,1],"; *[a-z]__ *$","")
data[,1]<-str_replace(data[,1],";$","")
data[,1]<-str_replace_all(data[,1],";","|")
meta<-t(meta)
data<-data[,c(1,match(meta[1,],colnames(data)))]
#Filter otus
#otu_sum<-colSums(t(data[,-1])>0)
#data<-data[otu_sum>0.25*(ncol(data)-1),]
write.table(meta,file = ag[4],row.names = T,col.names = F,quote = F,sep = "\t",append = F)
write.table(data,file = ag[4],row.names = F,col.names = F,quote = F,sep = "\t",append = T)
}