-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathali-pfam2afa-sscons.pl
executable file
·62 lines (57 loc) · 1.77 KB
/
ali-pfam2afa-sscons.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/env perl
# EPN, Fri Nov 15 13:30:45 2019
# ali-pfam2afa-ssoncs.pl
#
# Given a pfam formatted alignment with consensus structure annotation (#=GC SS_cons)
# convert it to aligned fasta with dot-bracket notation.
#
use warnings;
use strict;
use Getopt::Long;
my $usage;
$usage = "ali-pfam2afa-sscons.pl\n\n";
$usage .= "Usage:\n\n";
$usage .= "Pfam (1 line Stockholm) formatted alignment to aligned FASTA:\n";
$usage .= "ali-pfam2afa-sscons.pl [OPTIONS] <alignment file in Pfam format with SS_cons annotation>\n\n";
$usage .= "Stockholm formatted alignment to aligned FASTA:\n";
$usage .= "esl-reformat pfam <stockholm alignment> | ali-pfam2afa-sscons.pl [OPTIONS] -\n\n";
my $do_upper = 0;
my $do_dotbracket = 0;
my %seen_H = ();
&GetOptions( "upper" => \$do_upper,
"dotbracket" => \$do_dotbracket);
if(scalar(@ARGV) != 1) { die $usage; }
my %desc_H = ();
while(my $line = <>) {
if($line =~ m/^\#=GS\s+(\S+)\s+DE\s+(.+)$/) {
$desc_H{$1} = $2;
}
elsif($line !~ /^\#/) {
if($line =~ /^(\S+)\s+(\S+)/) {
my ($seqname, $seq) = ($1, $2);
if($seen_H{$seqname}) { die "ERROR, alignment looks interleaved, saw $seqname twice. Reformat to pfam with 'esl-reformat pfam <input.sto>'\n"; }
print(">$seqname");
if(defined $desc_H{$seqname}) {
print(" $desc_H{$seqname}");
}
print("\n");
if($do_dotbracket) {
$seq =~ s/[^A-Za-z]/./g;
}
if($do_upper) {
$seq =~ tr/a-z/A-Z/;
}
print $seq . "\n";
}
}
elsif($line =~ /^\#=GC\s+SS\_cons\s+(\S+)/) {
my ($orig_ss) = $1;
my $ss = $orig_ss;
if($do_dotbracket) {
$ss =~ s/[\{\[\<\(]/\(/g;
$ss =~ s/[\}\]\>\)]/\)/g;
$ss =~ s/[\:\_\,\-\~]/\./g;
}
print(">SS_cons\n$ss\n");
}
}