-
Notifications
You must be signed in to change notification settings - Fork 1
/
3_alignRefineAndTree.sbatch
41 lines (30 loc) · 1.22 KB
/
3_alignRefineAndTree.sbatch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/bin/bash
# Example usage:
#
# INDIR=results FILE=HTT_condidates_centroids.fasta MINBLOCKSIZE=5 ALLOWEDGAPS=a sbatch 3_alignRefineAndTree.sbatch
#
#SBATCH -p batch
#SBATCH -N 1
#SBATCH -n 8
#SBATCH --time=1-00:00
#SBATCH --mem=32GB
# load module
module load MUSCLE/3.8.31
# go to dir
cd $INDIR
# align sequences
# note: use -maxiters 2 if very large alignment
muscle -in $FILE -out ${FILE%.fasta}.afa
# Get blocks of conserved sequence from the alignment file
# Allowed gap positions is by default set to none
# to change, set -b5=h (half) or a (all)
# Min block size is by default set to 10
Gblocks ${FILE%.fasta}.afa -t=d -p=n -e=.gb -b4="$MINBLOCKSIZE" -b5="$ALLOWEDGAPS"
# Gblocks outputs an alignment file of 10-character blocks separated by white space
# Need to delete the white spaces
# Also change extension from .afa.gb to _gb.afa
tr -d " \t" < ${FILE%.fasta}.afa.gb > ${FILE%.fasta}_b4"$MINBLOCKSIZE"_b5"$ALLOWEDGAPS"_gb.afa
# Remove all files with white space and extension .afa.gb
#rm *.afa.gb
# Infer a maximum likelihood phylogeny from the refined alignment using FastTree
FastTree -nt -gtr < ${FILE%.fasta}_b4"$MINBLOCKSIZE"_b5"$ALLOWEDGAPS"_gb.afa > ${FILE%.fasta}_b4"$MINBLOCKSIZE"_b5"$ALLOWEDGAPS"_gb.tree