-
Notifications
You must be signed in to change notification settings - Fork 1
/
batch_correction_3Llauncher.R
146 lines (125 loc) · 7.19 KB
/
batch_correction_3Llauncher.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
###############################################################################################################
# batch_correction_3Llauncher #
# #
# Authors: Jean-Francois MARTIN / Melanie Petera #
# Starting date: 04-08-2020 #
# Based on batch_correction_wrapper.R version 2.91 #
# Version 1: 02-10-2020 #
# - split of tool-linked code and script-linked one #
# - handling of sample tags' parameters #
# - accepting samples beyond pools and samples #
# - dealing with special characters in IDs and column names #
# - adding a min.norm argument to the function #
# #
# Input files: dataMatrix.txt, sampleMetadata.txt, variableMetadata.txt (BC only) #
# Output files: graph.pdf, corrected table (BC only), diagnostic table (DBC only), variableMetadata (BC only) #
# #
###############################################################################################################
meth3L <- function(idsample,iddata,sample_type_col_name,injection_order_col_name,batch_col_name,sample_type_tags,
factbio,analyse,metaion,detail,method,outlog,span,valnull,
rdata_output,dataMatrix_out,variableMetadata_out,out_graph_pdf,out_preNormSummary,min.norm){
## Import function
tab.import <- function(tested.file,tabtype){
tab.res <- tryCatch(read.table(tested.file,header=TRUE,sep='\t',check.names=FALSE,comment.char = ''), error=conditionMessage)
if(length(tab.res)==1){
stop(paste("Could not import the",tabtype,"file. There may be issues in your table integrity.\nCorresponding R error message:\n",tab.res))
}else{
tab.comp <- tryCatch(read.table(tested.file,header=TRUE,sep='\t',check.names=FALSE,comment.char = '',quote=""), error=conditionMessage)
if((length(tab.comp)!=1)&&(dim(tab.res)!=dim(tab.comp))){ # wrong original import due to quotes inside a column name
return(tab.comp)
}else{ return(tab.res) }
}
}
## Reading of input files
idsample=tab.import(idsample,"sampleMetadata")
iddata=tab.import(iddata,"dataMatrix")
### Table match check
table.check <- match2(iddata,idsample,"sample")
if(length(table.check)>1){check.err(table.check)}
### StockID
samp.id <- stockID(iddata,idsample,"sample")
iddata<-samp.id$dataMatrix ; idsample<-samp.id$Metadata ; samp.id<-samp.id$id.match
### Checking mandatory variables
mand.check <- ""
for(mandcol in c(sample_type_col_name, injection_order_col_name, batch_col_name)){
if(!(mandcol%in%colnames(idsample))){
mand.check <- c(mand.check,"\nError: no '",mandcol,"' column in sample metadata.\n",
"Note: column names are case-sensitive.\n")
}
}
if(length(mand.check)>1){
mand.check <- c(mand.check,"\nFor more information, see the help section or:",
"\n http://workflow4metabolomics.org/sites/",
"workflow4metabolomics.org/files/files/w4e-2016-data_processing.pdf\n")
check.err(mand.check)
}
if(analyse == "batch_correction") {
## Reading of Metadata Ions file
metaion=read.table(metaion,header=T,sep='\t',check.names=FALSE,comment.char = '')
## Table match check
table.check <- c(table.check,match2(iddata,metaion,"variable"))
## StockID
var.id <- stockID(iddata,metaion,"variable")
iddata<-var.id$dataMatrix ; metaion<-var.id$Metadata ; var.id<-var.id$id.match
}
### Formating
idsample[[1]]=make.names(idsample[[1]])
dimnames(iddata)[[1]]=iddata[[1]]
### Transposition of ions data
idTdata=t(iddata[,2:dim(iddata)[2]])
idTdata=data.frame(dimnames(idTdata)[[1]],idTdata)
### Merge of 2 files (ok even if the two dataframe are not sorted on the same key)
ids=merge(idsample, idTdata, by.x=1, by.y=1)
ids[[batch_col_name]]=as.factor(ids[[batch_col_name]])
nbid=dim(idsample)[2]
### Checking the number of sample and pool
# least 2 samples
if(length(which(ids[[sample_type_col_name]] %in% sample_type_tags$sample))<2){
table.check <- c(table.check,"\nError: less than 2 samples specified in sample metadata.",
"\nMake sure this is not due to errors in your ",sample_type_col_name," coding.\n")
}
# least 2 pools per batch for all batchs
B <- rep(0,length(levels(ids[[batch_col_name]])))
for(nbB in 1:length(levels(ids[[batch_col_name]]))){
B[nbB]<-length(which(ids[which(ids[[batch_col_name]]==(levels(ids[[batch_col_name]])[nbB])),,drop=FALSE][[sample_type_col_name]] %in% sample_type_tags$pool))
}
if(length(which(B>1))==0){
table.check <- c(table.check,"\nError: less than 2 pools specified in at least one batch in sample metadata.",
"\nMake sure this is not due to errors in your ",sample_type_col_name," coding.\n")
}
### Checking the unicity of samples and variables
uni.check <- function(tested.tab,tabtype,err.obj){
unicity <- duplicated(tested.tab[,1])
if(sum(unicity)>0){
#Sending back an explicit error
duptable <- t(t(table(tested.tab[,1][unicity])+1))
err.obj <- c(err.obj,paste0("\n-------\nError: your '",tabtype,"' IDs contain duplicates:\n"),
paste(rownames(duptable),duptable,sep=": ",collapse="\n"),
"\nSince identifiers are meant to be unique, please check your data.\n-------\n")
}
return(err.obj)
}
table.check <- uni.check(ids,"sample",table.check)
if(analyse == "batch_correction"){table.check <- uni.check(metaion,"variable",table.check)}
## error check
check.err(table.check)
### BC/DBC-specific processing
# Gathering mandatory information in a single object
sm.meta <- list(batch=batch_col_name, injectionOrder=injection_order_col_name, sampleType=sample_type_col_name, sampleTag=sample_type_tags)
if(analyse == "batch_correction") {
## Launch
res = norm_QCpool(ids,nbid,outlog,factbio,metaion,detail,FALSE,FALSE,method,span,valnull,sm.meta,min.norm)
## Get back original IDs
var.id <- reproduceID(res[[1]],res[[2]],"variable",var.id)
res[[1]] <- var.id$dataMatrix ; res[[2]] <- var.id$Metadata
samp.id <- reproduceID(res[[1]],res[[3]],"sample",samp.id)
res[[1]] <- samp.id$dataMatrix ; res[[3]] <- samp.id$Metadata
## Save files
save(res, file=rdata_output)
write.table(res[[1]], file=dataMatrix_out, sep = '\t', row.names=FALSE, quote=FALSE)
write.table(res[[2]], file=variableMetadata_out, sep = '\t', row.names=FALSE, quote=FALSE)
}else{
## Launch
plotsituation(ids,nbid,out_graph_pdf,out_preNormSummary,factbio,span,sm.meta)
}
}#end of meth3L