-
Notifications
You must be signed in to change notification settings - Fork 42
/
Copy pathannoscriptConfig.sh
executable file
·177 lines (148 loc) · 4.15 KB
/
annoscriptConfig.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#!/bin/bash
#set -x
set -e
set -u
usage()
{
cat <<EOF >&2
${txtcyn}
Usage:
$0 options${txtrst}
${bldblu}Function${txtrst}:
This script is used to create configuration file for annocript.pl.
${txtbld}OPTIONS${txtrst}:
-d Annoscript working directory
${bldred}[Relative path, NECESSARY]${txtrst}
-D Annoscript intsall directory
${bldred}[Default: /MPATHB/soft/Annoscript, NECESSARY]${txtrst}
-f Fastaseqs file.
Allowed characters [A-za-z0-9\_\-]. Allowed extensions (fa|fasta).
Please use a dot only to separate the extension!
${bldred}[NECESSARY]${txtrst}
EOF
}
anno_work_dir=
anno_install_dir=/MPATHB/soft/Annoscript
file=
while getopts "hd:D:f:" OPTION
do
case $OPTION in
h)
usage
exit 1
;;
d)
anno_work_dir=$OPTARG
;;
D)
anno_install_dir=$OPTARG
;;
f)
file=$OPTARG
;;
?)
usage
exit 1
;;
esac
done
if test -z "${anno_work_dir}"; then
usage
exit 1
fi
cat <<END >${anno_work_dir}/config_user.txt
# File configuration for the user of Annocript
# READ CAREFULLY!!!
# This file has been written with a specific sintax.
# The variables MUST stay in the format: variable = value
# A series of hashes (#########) closes the parameters to read
# Parameters of BLAST programs without a value assigned will not be used (i.e. word_sizeX = )
# When you want to execute something you have to write YES (in upper case) or NO otherwise
# other strings will give error.
##############################
#Allowed characters [A-za-z0-9\_\-]. Allowed extensions (fa|fasta). Please use a dot only to separate the extension!
fastaSeqs = ${file}
#organisms to blast ('all' means all the organisms in UniProt are taken)
#please use 'all' or a file name with organisms names
#Such file must be placed in your working directory (i.e. ann_works)
#Selection of the organisms works only if the TrEMBL database is used!
blastedOrganism = all
#How to extract GO terms: you can choose to extract
#for proteins ('proteins'), domains ('domains') or for both ('both')
goTermsAss = both
#Steps to perform
doDbCreation = YES
doExecutePrograms = YES
doBuildOutput = YES
extractStatistics = YES
#Analyses to execute
doBlastxSP = YES
doBlastxTRorUf = YES
doRpstblastn = YES
doBlastn = YES
doPortrait = YES
doDna2Pep = YES
#Generation of a GFF database
useGFFDB = NO
#Write YES, if you want GFF output files. Using NO increases the speed.)
printGFFOutput = NO
#BLASTX and BLASTP PARAMETERS (we use word_size 4 and threshold 18 to reduce computational time)
#(outfmt can be only 0 with this version of Annocript)
#Currently you can only use these parameters. Please ask in the forum if you need others.
word_sizeX = 4
evalueX = 1E-5
num_descriptionsX = 5
num_alignmentsX = 5
max_target_seqsX =
num_threadsX = 15
thresholdX = 18
matrixX =
#BLASTN PARAMETERS
word_sizeN =
evalueN = 0.00001
num_descriptionsN = 1
num_alignmentsN = 1
max_target_seqsN =
num_threadsN = 4
thresholdN =
#RPSBLAST and RPSTBLASTN PARAMETERS
word_sizeRPS =
evalueRPS = 0.00001
num_descriptionsRPS = 20
num_alignmentsRPS = 20
max_target_seqsRPS =
thresholdRPS =
#Number of threads for parallel executions (Used only for RPSBLAST)
threads4Parallel = 30
#BLAST results with evalue lower than evalMax will be shown in the tabular output
evalMax = 0.00001
#DNA2PEP PARAMETERS
d2pMode = none
#PLOTS
#Number of top scored elements to show in the plots (maximum is 50)
topToShow = 20
#Type of plot to show [currently you can use only barplot]
plotType = barplot
#Thresholds to be non-coding. They guide the heuristic in Annocript
#Minimum Portrait score
NCThresh = 0.95
#Maximum length of the ORF
NCORFLength = 100
#Minimum length of the transcript
NCSeqLength = 200
#FIXED PARAMETERS (You should set only once)#
#Database account info
mySqlUser = annoscript
mySqlPass = annoscript123
#UNIPROT informations for access
uniprotWebUser = anonymous
uniprotWebPass = tchen@genetics.ac.cn
#Programs Paths
blastPath = /MPATHB/soft/ncbi-blast-2.2.31+/bin/
portraitPath = /MPATHB/soft/Annoscript/DL_PROGRAMS/portrait-1.1/portrait-1.1.pl
dna2pepPath = /MPATHB/soft/Annoscript/DL_PROGRAMS/dna2pep-1.1/dna2pep.py
##############################
END
cat <<END >${anno_work_dir}/folders.txt
`pwd`/${anno_work_dir} ${anno_install_dir}
END