-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfastaSplit.pl
executable file
·37 lines (30 loc) · 990 Bytes
/
fastaSplit.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/perl -w
# fastaSplit.pl input.fa prefix bins
# adapted from https://www.biostars.org/p/2226/
# also available as 'faSplit sequence input.fa 100 part_' (Kent tools)
use strict;
use Bio::SeqIO;
my $from = shift;
my $toprefix = shift;
my $seqnum = shift;
# count records in infile
my $tot = `grep -c "^>" $from`;
chomp($tot);
my $dig = length(int($tot / $seqnum))+1;
# process input
my $in = new Bio::SeqIO(-file => $from);
my $count = 0;
my $fcount = 1;
my $suff = sprintf ("%0${dig}d", $fcount );
# create first output
my $out = new Bio::SeqIO(-file => ">".$toprefix."_".$suff.".fasta", -format=>'fasta');
while (my $seq = $in->next_seq) {
if ($count > 0 && $count % $seqnum == 0) {
$fcount++;
$suff = sprintf ("%0${dig}d", $fcount );
print STDOUT $suff;
$out = new Bio::SeqIO(-file => ">".$toprefix."_".$suff.".fasta", -format=>'fasta');
}
$out->write_seq($seq);
$count++;
}