-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_assemblytics.sh
executable file
·136 lines (116 loc) · 3.43 KB
/
run_assemblytics.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/bin/bash
# run_assemblytics.sh: produce a assembly QC based on mummer comparison to a reference
#
# Requirements:
# run on a unix computer
# nucmer (mummer4 apps) and Assemblytics both in $PATH
# R packages for plotting
# two related fasta references to be compared
#
# Stephane Plaisance (VIB-NC+BITS) 2017/11/27; v1.0
#
# v1.1: mummer4 nucmer is now multithreaded
# updated Assemblytics 1.2 on 2020_03_27 from github
# https://github.com/MariaNattestad/Assemblytics
#
# visit our Git: https://github.com/Nucleomics-VIB
# check parameters for your system
version="1.1, 2020_03_27"
# path to the Assemblytics scripts should be in PATH
# default_path_to_scripts="/opt/biotools/Assemblytics/scripts"
usage='# Usage: run_assemblytics.sh -x <reference (fasta)> -y <query-asm fasta)>
# script version '${version}'
# [optional: -o <result folder|Assemblytics_results>]
# [optional: -w <uniqseqlen|10000>]
# [optional: -m <min variant length|50>]
# [optional: -M <max variant length|10000>]
# [optional: -t <threads for alignment (4)>]
# [optional: -p <path to scripts|default set in the code>]
# [optional: -h <this help text>]'
while getopts "x:y:o:w:m:M:t:h" opt; do
case $opt in
x) assembly1=${OPTARG} ;;
y) assembly2=${OPTARG} ;;
o) outpathopt=${OPTARG} ;;
w) uniqseqlen=${OPTARG} ;;
m) minl=${OPTARG} ;;
M) maxl=${OPTARG} ;;
t) threads=${OPTARG} ;;
h) echo "${usage}" >&2; exit 0 ;;
\?) echo "Invalid option: -${OPTARG}" >&2; exit 1 ;;
*) echo "this command requires arguments, try -h" >&2; exit 1 ;;
esac
done
# check executables present
declare -a arr=( "nucmer" "R" "Assemblytics" )
for prog in "${arr[@]}"; do
$( hash ${prog} 2>/dev/null ) || ( echo "# required ${prog} not found in PATH"; exit 1 )
done
# defaults parameters
uniq_seq_len=${uniqseqlen:-"10000"}
minlen=${minl:-50}
maxlen=${maxl:-10000}
thr=${threads:-4}
# test if minimal arguments were provided
if [ -z "${assembly1}" ]
then
echo "# no first assembly provided!"
echo "${usage}"
exit 1
fi
if [ ! -f "${assembly1}" ]; then
echo "${assembly1} file not found!"
exit 1
fi
if [ -z "${assembly2}" ]
then
echo "# no second assembly provided!"
echo "${usage}"
exit 1
fi
if [ ! -f "${assembly2}" ]; then
echo "${assembly2} file not found!";
exit 1
fi
# other parameters or defaults
outpath=${outpathopt:-"Assemblytics_results"}
asm1=$(basename ${assembly1%.f*})
asm2=$(basename ${assembly2%.f*})
deltabase=${asm2}_vs_${asm1}
mkdir -p ${outpath}
# build the nucmer4 command
nucmercmd="nucmer \
--maxmatch \
--threads=${thr} \
--minmatch=100 \
--mincluster=500 \
${assembly1} \
${assembly2} \
--prefix=${outpath}/${deltabase} \
> ${outpath}/assemblytics-log.txt 2>&1"
# show and execute
echo "## pairwise alignments with nucmer:"
echo "# ${nucmercmd}"
eval ${nucmercmd}
# check for failure
if [ $? -ne 0 ]; then
echo "# the nucmer command failed, please check your inputs"
exit 0
fi
# build the Assemblytics command
cmd="Assemblytics ${outpath}/${deltabase}.delta \
${outpath}/${deltabase} \
${uniq_seq_len} \
${minlen} \
${maxlen} \
>> ${outpath}/assemblytics-log.txt 2>&1"
# show and execute
echo
echo "## Assemblytics analysis:"
echo "# ${cmd}"
eval ${cmd}
exit 0
########################################################################################
# man pages for the main executables used above
# Usage:
# Assemblytics delta output_prefix unique_length_required min_size max_size