-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmatch2prokka.sh
executable file
·61 lines (37 loc) · 1.14 KB
/
match2prokka.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/bin/bash
module -s load prokka
module -s load diamond
DELTAMATCHES=$(readlink -f $1)
FASTADIR=$(readlink -f $2)
echo $FASTADIR
filename=$(basename -- $DELTAMATCHES)
base=${filename%.*}
TMPDIR="match2prokka_"$base
mkdir -v "$TMPDIR"
cd $TMPDIR
#split matches into separate files, retain order
NM=2
echo processing $NM columns of matches
#exract contig names and set source fasta file varables
FASTAFILES=$(awk -v NM=$NM 'BEGIN{RS="\n";FS=" "} NR==1{print $1, $2}NR>1{for(i=1;i<=NM;i++){name="names_"i;print ">"$i> name}}' $DELTAMATCHES)
echo $FASTAFILES
#retrieve sequences from original fasta file
#for file in names_*; do
for ((i=1;i<=2;i++)); do
file="names_"$i
fasta=$(echo $FASTAFILES | cut -d " " -f $i)
echo names file is $file
echo source fastafile is $fasta
awk 'BEGIN{RS=">";FS="\n"}NR==FNR{a[$1]++}NR>FNR{if ($1 in a && $0!="") printf ">%s",$0}' $file $FASTADIR/$fasta > $file"_seqs"
done
#prokka
#TODO - add in CARD and BACMET databases
for file in names_*_seqs; do
echo running $file through PROKKA
prokka $file --prefix "PROKKA_"$file --quiet
done
#diamond
echo cleaning up...
module unload prokka
module unload diamond
cd ..