-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy pathAssemblytics
executable file
·143 lines (117 loc) · 5.91 KB
/
Assemblytics
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/bin/bash
# Author: Maria Nattestad
# github.com/marianattestad/assemblytics
USAGE="Assemblytics delta output_prefix unique_length_required min_size max_size"
if [ -z "$1" ]
then
echo "ERROR in Assemblytics: No delta file given"
echo "Usage:"
echo $USAGE
exit
fi
if [ -z "$2" ]
then
echo "ERROR in Assemblytics: No output prefix given"
echo "Usage:"
echo $USAGE
exit
fi
if [ -z "$3" ]
then
echo "ERROR in Assemblytics: No unique length requirement parameter given"
echo "Usage:"
echo $USAGE
exit
fi
if [ -z "$4" ]
then
echo "ERROR in Assemblytics: No minimum size parameter given"
echo "Usage:"
echo $USAGE
exit
fi
if [ -z "$5" ]
then
echo "ERROR in Assemblytics: No maximum size parameter given"
echo "Usage:"
echo $USAGE
exit
fi
DELTA=${1?"$USAGE"}
OUTPUT_PREFIX=${2?"$USAGE"}
UNIQUE_LENGTH=${3?"$USAGE"}
MINIMUM_SIZE=${4?"$USAGE"}
MAXIMUM_SIZE=${5?"$USAGE"}
>&2 echo Input delta file: $DELTA
>&2 echo Output prefix: $OUTPUT_PREFIX
>&2 echo Unique anchor length: $UNIQUE_LENGTH
>&2 echo Minimum variant size to call: $MINIMUM_SIZE
>&2 echo Maximum variant size to call: $MAXIMUM_SIZE
mkdir -p ${OUTPUT_PREFIX%/*}/
# In the web application, this log file is used to show status updates in the GUI
LOG_FILE=${OUTPUT_PREFIX%/*}/progress.log
>&2 echo "Logging progress updates in $LOG_FILE"
SCRIPT_PATH=$(dirname $BASH_SOURCE)
echo 'script path:' $SCRIPT_PATH
# Check that Rscript is available (needed by R scripts)
if [ -z $(which Rscript) ]; then
>&2 echo "Error: No Rscript executable available in PATH: $PATH."
exit
fi
echo "${OUTPUT_PREFIX##*/}" >> $LOG_FILE
echo "STARTING,DONE,Starting unique anchor filtering." >> $LOG_FILE
>&2 echo "1. Filter delta file"
$SCRIPT_PATH/Assemblytics_uniq_anchor.py --delta $DELTA --unique-length $UNIQUE_LENGTH --out $OUTPUT_PREFIX --keep-small-uniques
if [ -e $OUTPUT_PREFIX.Assemblytics.unique_length_filtered_l$UNIQUE_LENGTH.delta.gz ];
then
echo "UNIQFILTER,DONE,Step 1: Assemblytics_uniq_anchor.py completed successfully. Now finding variants between alignments." >> $LOG_FILE
>&2 echo "2. Finding variants between alignments"
$SCRIPT_PATH/Assemblytics_between_alignments.pl $OUTPUT_PREFIX.coords.tab $MINIMUM_SIZE $MAXIMUM_SIZE all-chromosomes exclude-longrange bed > $OUTPUT_PREFIX.variants_between_alignments.bed
if [ -e $OUTPUT_PREFIX.variants_between_alignments.bed ];
then
echo "BETWEEN,DONE,Step 2: Assemblytics_between_alignments.pl completed successfully. Now finding variants within alignments." >> $LOG_FILE
>&2 echo "3. Finding variants within alignments"
$SCRIPT_PATH/Assemblytics_within_alignment.py --delta $OUTPUT_PREFIX.Assemblytics.unique_length_filtered_l$UNIQUE_LENGTH.delta.gz --min $MINIMUM_SIZE --output $OUTPUT_PREFIX.variants_within_alignments.bed
if [ -e $OUTPUT_PREFIX.variants_within_alignments.bed ];
then
echo "WITHIN,DONE,Step 3: Assemblytics_within_alignment.py completed successfully. Now combining the two sets of variants together." >> $LOG_FILE
>&2 echo "4. Combine variants between and within alignments";
HEADER="#reference\tref_start\tref_stop\tID\tsize\tstrand\ttype\tref_gap_size\tquery_gap_size\tquery_coordinates\tmethod"
cat <(echo -e $HEADER) $OUTPUT_PREFIX.variants_within_alignments.bed $OUTPUT_PREFIX.variants_between_alignments.bed > $OUTPUT_PREFIX.Assemblytics_structural_variants.bed
if [ -e $OUTPUT_PREFIX.Assemblytics_structural_variants.bed ];
then
echo "COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics." >> $LOG_FILE
$SCRIPT_PATH/Assemblytics_variant_charts.R $OUTPUT_PREFIX $MINIMUM_SIZE $MAXIMUM_SIZE
$SCRIPT_PATH/Assemblytics_index.py -coords $OUTPUT_PREFIX.coords.csv -out $OUTPUT_PREFIX
$SCRIPT_PATH/Assemblytics_dotplot.R $OUTPUT_PREFIX
cat $OUTPUT_PREFIX.coords.tab | awk '{print $7,$5}' OFS='\t' | sort | uniq | sort -k2,2nr > $OUTPUT_PREFIX.coords.ref.genome
cat $OUTPUT_PREFIX.coords.tab | awk '{print $8,$6}' OFS='\t' | sort | uniq | sort -k2,2nr > $OUTPUT_PREFIX.coords.query.genome
$SCRIPT_PATH/Assemblytics_Nchart.R $OUTPUT_PREFIX
$SCRIPT_PATH/Assemblytics_summary.py -i $OUTPUT_PREFIX.Assemblytics_structural_variants.bed -min $MINIMUM_SIZE -max $MAXIMUM_SIZE > $OUTPUT_PREFIX.Assemblytics_structural_variants.summary
# zip up results for quick download of all results
zip $OUTPUT_PREFIX.Assemblytics_results.zip $OUTPUT_PREFIX.Assemblytics*
# create a small preview file of the variants for the GUI
head $OUTPUT_PREFIX.Assemblytics_structural_variants.bed | column -t > $OUTPUT_PREFIX.variant_preview.txt
if grep -q "Total" $OUTPUT_PREFIX.Assemblytics_structural_variants.summary;
then
echo "SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully" >> $LOG_FILE
else
echo "SUMMARY,FAIL,Step 5: Assemblytics_summary.py failed" >> $LOG_FILE
exit 1
fi
else
echo "COMBINE,FAIL,Step 4: combining variants failed" >> $LOG_FILE
exit 1
fi
else
echo "WITHIN,FAIL,Step 3: Assemblytics_within_alignment.py failed: Possible problem before this step or with Python on server." >> $LOG_FILE
exit 1
fi
else
echo "BETWEEN,FAIL,Step 2: Assemblytics_between_alignments.pl failed: Possible problem with Perl or show-coords on server." >> $LOG_FILE
exit 1
fi
else
echo "UNIQFILTER,FAIL,Step 1: Assemblytics_uniq_anchor.py failed: Possible problem with Python or Python packages on server." >> $LOG_FILE
exit 1
fi