-
Notifications
You must be signed in to change notification settings - Fork 23
/
DBinfo.pl
94 lines (72 loc) · 1.96 KB
/
DBinfo.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
use FindBin;
use lib "$FindBin::Bin/perlLib";
use Cwd qw/getcwd abs_path/;
use File::Copy;
$| = 1;
use taxTree;
use Util;
unless(scalar(@ARGV) >= 2)
{
print_help();
}
my $DB = $ARGV[0];
my $reportLevel = $ARGV[1];
my $limitTo1 = $ARGV[2];
my $limitTo2 = $ARGV[3];
my $printDetails = $ARGV[4];
my %taxonID_2_contigs;
my %contigLength;
Util::read_taxonIDs_and_contigs($DB, \%taxonID_2_contigs, \%contigLength);
my $taxonomyDir = $DB . '/taxonomy';
my $taxonomy = taxTree::readTaxonomy($taxonomyDir);
my %report;
foreach my $contigID (keys %contigLength)
{
my $taxonID = Util::extractTaxonID($contigID, '?', '?');
my @ancestors = ($taxonID, taxTree::get_ancestors($taxonomy, $taxonID));
my %taxonID_ranks;
foreach my $nodeID (@ancestors)
{
my $rank = $taxonomy->{$nodeID}{rank};
die unless(defined $rank);
$taxonID_ranks{$rank} = taxTree::taxon_id_get_name($nodeID, $taxonomy);
}
if($limitTo1)
{
next unless((exists $taxonID_ranks{$limitTo1}) and ($taxonID_ranks{$limitTo1} eq $limitTo2));
}
$taxonID_ranks{$reportLevel} = 'Undefined' unless(defined $taxonID_ranks{$reportLevel});
$report{$taxonID_ranks{$reportLevel}}[0]++;
$report{$taxonID_ranks{$reportLevel}}[1] += $contigLength{$contigID};
$report{$taxonID_ranks{$reportLevel}}[2]{$taxonID}++;
}
print "\nDB statistics at level '$reportLevel':\n";
foreach my $v (sort keys %report)
{
my $n_genomes = scalar(keys %{$report{$v}[2]});
print "\t - ${v}: $n_genomes genomes ($report{$v}[0] contigs), ", sprintf("%.2f", $report{$v}[1] / (1024**2)), "mb.\n";
if($printDetails)
{
foreach my $taxonID (keys %{$report{$v}[2]})
{
print "\t\t", $taxonID, " ", $taxonomy->{$taxonID}{names}[0], "\n";
}
}
}
print "\n";
sub print_help
{
print qq(
DBinfo.pl
Print some statistics on database composition (# genomes, megabytes).
Usage:
perl DBinfo.pl dbNAME taxonomyLevel
Example:
perl DBinfo.pl databases/miniSeq superfamily
);
exit;
}