-
Notifications
You must be signed in to change notification settings - Fork 3
/
LoadConfig.sh
executable file
·167 lines (143 loc) · 5.08 KB
/
LoadConfig.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#!/usr/bin/env bash
trap_add 'trap - SIGTERM && kill -- -$$' SIGINT SIGTERM
################################################################################################################
work_dir=$maindir/NGSmodule_work/
if [[ ! -d $work_dir ]] && [[ $1 != "prepare" ]]; then
color_echo "red" "Error! Can not find the work_dir: $work_dir\nPlease run 'NGSmodule CreateWorkDir -c <Config_file>' first!\n"
exit 1
fi
############# Load SampleInfoFile ###################################################################
if [[ -f $SampleInfoFile ]]; then
declare -A Sample_dict
declare -A Layout_dict
declare -A Group_dict
echo -e ">>> Found the SampleInfoFile: $SampleInfoFile\n"
sed -i '/^$/d' $SampleInfoFile
if [[ ! $(echo $SampleInfoFile | grep ".csv") ]]; then
color_echo "red" "ERROR! SampleInfoFile name must end with '.csv'.\n"
exit 1
fi
validation=$(awk 'BEGIN {FS=","; v = "TRUE" } NR == 1 { n = NF; next } NF != n || NF<2 { v = "FALSE"; exit }END{printf(v)}' $SampleInfoFile)
if [[ $validation == "FALSE" ]]; then
color_echo "red" "ERROR! Content in SampleInfoFile is not in a valid comma-separated format.\n.\n"
exit 1
fi
dos2unix $SampleInfoFile &>/dev/null
while IFS=',' read -r RunID SampleID Group Layout BatchID BatchInfo Other; do
RunID="$(echo -e "${RunID}" | tr -d '[:space:]')"
SampleID="$(echo -e "${SampleID}" | tr -d '[:space:]')"
Sample_dict[$RunID]=$SampleID
Group_dict[$SampleID]=$Group
Layout_dict[$SampleID]=$Layout
done < <(cat $SampleInfoFile; echo)
else
color_echo "red" "ERROR! Cannot find SampleInfoFile: $SampleInfoFile. Please check your config!\n"
exit 1
fi
###### START ######
if [[ -d $work_dir ]] && [[ $1 != "prepare" ]]; then
arr=()
while IFS='' read -r line; do
arr+=("$line")
done < <(find "$work_dir" -mindepth 1 -maxdepth 1 -type l -o -type d -printf '%P\n' | grep -P "$SampleGrepPattern" | sort)
###### threads ######
total_task=${#arr[@]}
if [[ "$total_task" == 0 ]]; then
color_echo "red" "ERROR! No sample sub-directory found in the work_dir:$work_dir\n"
exit 1
fi
if [[ "$ntask_per_run" =~ ^[0-9]+$ ]]; then
ntask_per_run=$ntask_per_run
elif [[ "$ntask_per_run" = "ALL" ]]; then
if ((total_task > total_threads)); then
ntask_per_run=$total_threads
else
ntask_per_run=$total_task
fi
else
color_echo "red" "ERROR! ntask_per_run should be 'ALL' or an interger!\n"
exit 1
fi
threads=$((total_threads / ntask_per_run))
if ((threads == 0)); then
threads=1
else
threads=$threads
fi
if ((threads > 32)); then
threads=32
else
threads=$threads
fi
if ((threads > 16)); then
threads_fastp=16
else
threads_fastp=$threads
fi
if ((threads > 64)); then
threads_featurecounts=64
else
threads_featurecounts=$threads
fi
if ((((threads / 8)) == 0)); then
threads_bismark=1
else
threads_bismark=$((threads / 8))
fi
###### other parameters ######
types=("rna" "dna" "BSdna")
if [[ " ${types[*]} " != *" $SequenceType "* ]]; then
color_echo "red" "ERROR! SequenceType is wrong.\nPlease check the paramaters in your ConfigFile.\n"
exit 1
fi
if [[ $SortmeRNA_ref_direct == "" ]]; then
SortmeRNA_ref="${SortmeRNA_Dir}/${SortmeRNA_Type}.${Species}.${SortmeRNA_DataVersion}.fa"
else
SortmeRNA_ref=$SortmeRNA_ref_direct
fi
if [[ "$SequenceType" == "BSdna" ]]; then
FastqScreen_mode="--bisulfite"
else
FastqScreen_mode=""
fi
de_option=("TRUE" "FALSE" "automatic")
if [[ ${Deduplication} == "automatic" ]]; then
case ${SequenceType} in
rna)
Deduplication="FALSE"
;;
dna)
Deduplication="TRUE"
;;
BSdna)
Deduplication="TRUE"
;;
*)
Deduplication="FALSE"
;;
esac
elif [[ " ${de_option[*]} " != *" $Deduplication "* ]]; then
color_echo "red" "ERROR! Deduplication must be empty or one of 'TRUE' and 'FALSE'.\nPlease check the paramaters in your ConfigFile.\n"
exit 1
fi
if [[ $Genome_direct == "" ]]; then
genome="$iGenomes_Dir/$Species/$Source/$Build/Sequence/WholeGenomeFasta/genome.fa"
else
genome=$Genome_direct
fi
if [[ $GTF_direct == "" ]]; then
gtf="$iGenomes_Dir/$Species/$Source/$Build/Annotation/Genes/genes.gtf"
else
gtf=$GTF_direct
fi
###### fifo ######
fifo $ntask_per_run
else
total_task="Waiting for creating the workdir"
ntask_per_run="Waiting for creating the workdir"
threads="Waiting for creating the workdir"
fi
################################################################################################################
echo -e "########################### Global config patameters ###########################\n"
echo -e " SequenceType: $SequenceType\n maindir: ${maindir}\n rawdata_dir: ${rawdata_dir}\n work_dir: ${work_dir}\n SampleInfoFile: ${SampleInfoFile}\n SampleGrepPattern: ${SampleGrepPattern}\n\n Total_tasks: ${total_task}\n nTask_per_run: ${ntask_per_run}\n Total_threads: ${total_threads}\n Threads_per_task: ${threads} (max=120)\n"
echo -e "################################################################################\n\n\n"