Skip to content

Commit

Permalink
Use buffer-size to limit CLI args for samtools/tabix
Browse files Browse the repository at this point in the history
  • Loading branch information
ckandoth committed Oct 23, 2018
1 parent 8d9e611 commit 7f3bd61
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions vcf2maf.pl
Original file line number Diff line number Diff line change
Expand Up @@ -359,11 +359,11 @@ sub GetBiotypePriority {
$vcf_fh->close;

# samtools runs faster when passed many loci at a time, but limited to around 125k args, at least
# on CentOS 6. If there are too many loci, split them into 50k chunks and run separately
# on CentOS 6. If there are too many loci, split them into smaller chunks and run separately
my ( $lines, @regions_split ) = ( "", ());
my @regions = keys %uniq_regions;
my $chr_prefix_in_use = ( @regions and $regions[0] =~ m/^chr/ ? 1 : 0 );
push( @regions_split, [ splice( @regions, 0, 50000 ) ] ) while @regions;
push( @regions_split, [ splice( @regions, 0, $buffer_size ) ] ) while @regions;
map{ my $region = join( " ", @{$_} ); $lines .= `$samtools faidx $ref_fasta $region` } @regions_split;
foreach my $line ( grep( length, split( ">", $lines ))) {
# Carefully split this FASTA entry, properly chomping newlines for long indels
Expand All @@ -386,7 +386,7 @@ sub GetBiotypePriority {
# Query each variant locus on the filter VCF, using tabix, just like we used samtools earlier
( $lines, @regions_split ) = ( "", ());
my @regions = keys %uniq_loci;
push( @regions_split, [ splice( @regions, 0, 50000 ) ] ) while @regions;
push( @regions_split, [ splice( @regions, 0, $buffer_size ) ] ) while @regions;
# ::NOTE:: chr-prefix removal works safely here because ExAC is limited to 1..22, X, Y
map{ my $loci = join( " ", map{s/^chr//; $_} @{$_} ); $lines .= `$tabix $filter_vcf $loci` } @regions_split;
foreach my $line ( split( "\n", $lines )) {
Expand Down

0 comments on commit 7f3bd61

Please sign in to comment.