Skip to content

Commit

Permalink
Merge branch 'release/v3.5.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
keiranmraine committed Feb 1, 2017
2 parents fbf793c + bd88f3a commit e04f26f
Show file tree
Hide file tree
Showing 13 changed files with 440 additions and 163 deletions.
8 changes: 7 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ addons:
- unzip
- libpng12-dev
- libexpat1-dev
- libgoogle-perftools-dev
- lsof

install: true

Expand All @@ -37,4 +39,8 @@ before_script:

script:
- ./setup.sh ~/wtsi-opt
- ~/wtsi-opt/bin/samtools view # dump usage to show intact
- $HOME/wtsi-opt/bin/samtools view # dump usage to show intact
- $HOME/wtsi-opt/bin/diff_bams -h
- $HOME/wtsi-opt/bin/reheadSQ -h
- $HOME/wtsi-opt/bin/bam_stats -h
- perl -I $HOME/wtsi-opt/lib/perl5 ~/wtsi-opt/bin/bwa_mem.pl -h
12 changes: 12 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
### 3.5.0
* Adds RNA downloads to PanCancer download tool `gnos_pull.pl`
* Hardening of external process handling in `PCAP::Threaded`
* Adds C version of `diff_bams`

### 3.4.0
* Significant speed up of BAM generation under `bwa_mem.pl` by using separate process to do compression of mark duplicate output and streaming BAS generation. Not possible to do this to CRAM in same way.

### 3.3.4
* Reduce disk usage when running `bwa_mem.pl`
* Improve throughput via slightly unintuitive use of additional pipes

### 3.3.0
* adds `map_threads|mt` option to `bwa_mem.pl` to allow more control of parallel processing in one shot submission.
* adds `bwa_pl|l` option to `bwa_mem.pl` to allow preload of different malloc libraries.
Expand Down
2 changes: 1 addition & 1 deletion INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ bwa_mem.pl will only function when 0.7.x installed
```
apt-get update && \
apt-get -y install \
build-essential zlib1g-dev libncurses5-dev libcurl4-gnutls-dev libssl-dev libexpat1-dev nettle-dev &&\
build-essential zlib1g-dev libncurses5-dev libcurl4-gnutls-dev libssl-dev libexpat1-dev nettle-dev lsof libgoogle-perftools-dev &&\
apt-get clean
```

Expand Down
129 changes: 1 addition & 128 deletions MANIFEST
Original file line number Diff line number Diff line change
@@ -1,29 +1,4 @@
.travis.yml
bam_out/logs_bwamem_wibble/PCAP_Bam_merge_and_mark_dup.0.err
bam_out/logs_bwamem_wibble/PCAP_Bam_merge_and_mark_dup.0.out
bam_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.1.err
bam_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.1.out
bam_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.2.err
bam_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.2.out
bam_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.3.err
bam_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.3.out
bam_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.4.err
bam_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.4.out
bam_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.5.err
bam_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.5.out
bam_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.6.err
bam_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.6.out
bam_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.7.err
bam_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.7.out
bam_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.8.err
bam_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.8.out
bam_out/logs_bwamem_wibble/PCAP_Bwa_split_in.1.err
bam_out/logs_bwamem_wibble/PCAP_Bwa_split_in.1.out
bam_out/wibble.bam
bam_out/wibble.bam.bai
bam_out/wibble.bam.bas
bam_out/wibble.bam.md5
bam_out/wibble.bam.met
bin/bam_stats.pl
bin/bam_to_sra_sub.pl
bin/bamToBw.pl
Expand All @@ -49,116 +24,14 @@ c/c_tests/minunit.h
c/c_tests/runtests.sh
c/c_tests/tests_log
c/dbg.h
c/diff_bams.c
c/khash.h
c/reheadSQ.c
CHANGES.md
dists/patch/Bio-BigFile_build.patch
dists/snappy-1.1.2.tar.gz
docs.tar.gz
examples/gnos_pull.ini
fastq/1/10658_1.fq.gz
fastq/1/10659_1.fq.gz
fastq/1/10660_1.fq.gz
fastq/1/10661_1.fq.gz
fastq/1/10662_1.fq.gz
fastq/1/10663_1.fq.gz
fastq/1/10664_1.fq.gz
fastq/1/10665_1.fq.gz
fastq/2/10658_2.fq.gz
fastq/2/10659_2.fq.gz
fastq/2/10660_2.fq.gz
fastq/2/10661_2.fq.gz
fastq/2/10662_2.fq.gz
fastq/2/10663_2.fq.gz
fastq/2/10664_2.fq.gz
fastq/2/10665_2.fq.gz
fastq_i/10658_i.fq.gz
fastq_i/10659_i.fq.gz
fastq_i/10660_i.fq.gz
fastq_i/10661_i.fq.gz
fastq_i/10662_i.fq.gz
fastq_i/10663_i.fq.gz
fastq_i/10664_i.fq.gz
fastq_i/10665_i.fq.gz
fastq_i_out/logs_bwamem_wibble/PCAP_Bam_merge_and_mark_dup.0.err
fastq_i_out/logs_bwamem_wibble/PCAP_Bam_merge_and_mark_dup.0.out
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.1.err
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.1.out
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.2.err
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.2.out
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.3.err
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.3.out
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.4.err
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.4.out
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.5.err
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.5.out
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.6.err
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.6.out
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.7.err
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.7.out
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.8.err
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.8.out
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_split_in.1.err
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_split_in.1.out
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_split_in.2.err
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_split_in.2.out
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_split_in.3.err
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_split_in.3.out
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_split_in.4.err
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_split_in.4.out
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_split_in.5.err
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_split_in.5.out
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_split_in.6.err
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_split_in.6.out
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_split_in.7.err
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_split_in.7.out
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_split_in.8.err
fastq_i_out/logs_bwamem_wibble/PCAP_Bwa_split_in.8.out
fastq_i_out/wibble.bam
fastq_i_out/wibble.bam.bai
fastq_i_out/wibble.bam.bas
fastq_i_out/wibble.bam.md5
fastq_i_out/wibble.bam.met
fastq_out/logs_bwamem_wibble/PCAP_Bam_merge_and_mark_dup.0.err
fastq_out/logs_bwamem_wibble/PCAP_Bam_merge_and_mark_dup.0.out
fastq_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.1.err
fastq_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.1.out
fastq_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.2.err
fastq_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.2.out
fastq_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.3.err
fastq_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.3.out
fastq_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.4.err
fastq_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.4.out
fastq_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.5.err
fastq_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.5.out
fastq_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.6.err
fastq_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.6.out
fastq_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.7.err
fastq_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.7.out
fastq_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.8.err
fastq_out/logs_bwamem_wibble/PCAP_Bwa_bwa_mem.8.out
fastq_out/logs_bwamem_wibble/PCAP_Bwa_split_in.1.err
fastq_out/logs_bwamem_wibble/PCAP_Bwa_split_in.1.out
fastq_out/logs_bwamem_wibble/PCAP_Bwa_split_in.2.err
fastq_out/logs_bwamem_wibble/PCAP_Bwa_split_in.2.out
fastq_out/logs_bwamem_wibble/PCAP_Bwa_split_in.3.err
fastq_out/logs_bwamem_wibble/PCAP_Bwa_split_in.3.out
fastq_out/logs_bwamem_wibble/PCAP_Bwa_split_in.4.err
fastq_out/logs_bwamem_wibble/PCAP_Bwa_split_in.4.out
fastq_out/logs_bwamem_wibble/PCAP_Bwa_split_in.5.err
fastq_out/logs_bwamem_wibble/PCAP_Bwa_split_in.5.out
fastq_out/logs_bwamem_wibble/PCAP_Bwa_split_in.6.err
fastq_out/logs_bwamem_wibble/PCAP_Bwa_split_in.6.out
fastq_out/logs_bwamem_wibble/PCAP_Bwa_split_in.7.err
fastq_out/logs_bwamem_wibble/PCAP_Bwa_split_in.7.out
fastq_out/logs_bwamem_wibble/PCAP_Bwa_split_in.8.err
fastq_out/logs_bwamem_wibble/PCAP_Bwa_split_in.8.out
fastq_out/wibble.bam
fastq_out/wibble.bam.bai
fastq_out/wibble.bam.bas
fastq_out/wibble.bam.md5
fastq_out/wibble.bam.met
insilico_21.bam
INSTALL.md
lib/PCAP.pm
lib/PCAP/Bam.pm
Expand Down
1 change: 1 addition & 0 deletions bin/bwa_mem.pl
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ sub setup {
$_ = "$cwd/$_" unless($_ =~ m|^/|);
push @{$opts{'raw_files'}}, $_;
}
pod2usage(-msg => "\nERROR: No BAM/CRAM or FASTQ files have been defined.\n", -verbose => 1, -output => \*STDERR) if(scalar @{$opts{'raw_files'}} == 0);

my $max_split = PCAP::Bwa::mem_prepare(\%opts);

Expand Down
77 changes: 60 additions & 17 deletions bin/gnos_pull.pl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

##########LICENCE##########
# PCAP - NGS reference implementations and helper code for the ICGC/TCGA Pan-Cancer Analysis Project
# Copyright (C) 2014-2015 ICGC PanCancer Project
# Copyright (C) 2014-2017 ICGC PanCancer Project
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
Expand Down Expand Up @@ -49,7 +49,7 @@
use PCAP;
use PCAP::Cli;

const my @ANALYSIS_TYPES => (qw(ALIGNMENTS CALLS));
const my @ANALYSIS_TYPES => (qw(ALIGNMENTS CALLS RNA_STAR RNA_TOPHAT));
const my @AVAILABLE_COMPOSITE_FILTERS => (qw(not_sanger_workflow caller max_dataset_GB multi_tumour sanger_version broad_version dkfz_embl_version jamboree_approved manual_donor_blacklist));
const my $DEFAULT_URL => 'http://pancancer.info/gnos_metadata/latest';
const my $GTDL_COMMAND => '%s%s --max-children 3 --rate-limit 200 -vv -c %s -d %scghub/data/analysis/download/%s -p %s';
Expand Down Expand Up @@ -152,6 +152,10 @@ sub pull_data {
$check_ref = \&check_alignments;
$code_ref = \&pull_alignments;
}
elsif($options->{'analysis'} =~ m/^RNA/) {
$check_ref = \&check_rna_alignments;
$code_ref = \&pull_rna_alignments;
}

my $thread_count = $options->{'threads'};
if($CAN_USE_THREADS == 0) {
Expand Down Expand Up @@ -218,6 +222,39 @@ sub check_or_create_symlink {
return 1;
}

sub check_rna_alignments{
my ($options, $donor, $outbase, $donor_base) = @_;
warn "Checking $donor->{donor_unique_id}\n";
$options->{'analysis'} =~ m/^RNA_(.+)/;
my $rna_type = lc $1;
my $to_do = 0;
# for normal:
if(exists $donor->{'rna_seq'}->{'alignment'}->{'normal'}->{$rna_type}) {
$to_do += check_bam($options, $donor->{'donor_unique_id'}, $donor->{'rna_seq'}->{'alignment'}->{'normal'}->{$rna_type}, $outbase, $donor_base, 'rna/'.$rna_type.'/normal');
}

# for tumour
for my $tumour_data(@{$donor->{'rna_seq'}->{'alignment'}->{'tumor'}}) {
$to_do += check_bam($options, $donor->{'donor_unique_id'}, $tumour_data->{$rna_type}, $outbase, $donor_base, 'rna/'.$rna_type.'/tumour');
}
return $to_do;
}

sub pull_rna_alignments {
my ($options, $donor, $outbase, $donor_base) = @_;
$options->{'analysis'} =~ m/^RNA_(.+)/;
my $rna_type = lc $1;
# for normal:
if(exists $donor->{'rna_seq'}->{'alignment'}->{'normal'}->{$rna_type}) {
pull_bam($options, $donor->{'donor_unique_id'}, $donor->{'rna_seq'}->{'alignment'}->{'normal'}->{$rna_type}, $outbase, $donor_base, 'rna/'.$rna_type.'/normal');
}

# for tumour
for my $tumour_data(@{$donor->{'rna_seq'}->{'alignment'}->{'tumor'}}) {
pull_bam($options, $donor->{'donor_unique_id'}, $tumour_data->{$rna_type}, $outbase, $donor_base, 'rna/'.$rna_type.'/tumour');
}
}

sub check_alignments {
my ($options, $donor, $outbase, $donor_base) = @_;
warn "Checking $donor->{donor_unique_id}\n";
Expand Down Expand Up @@ -254,8 +291,8 @@ sub check_bam {

if(-e $success) {
check_or_create_symlink($orig_bam, $sym_bam);
check_or_create_symlink($orig_bam.'.bai', $sym_bam.'.bai');
create_bas($repo, $gnos_id, $sym_bam);
check_or_create_symlink($orig_bam.'.bai', $sym_bam.'.bai') if(-e $orig_bam.'.bai');
create_bas($repo, $gnos_id, $sym_bam) unless($options->{'analysis'} =~ m/^RNA/);
return 0;
}

Expand All @@ -277,7 +314,6 @@ sub pull_alignments {

sub pull_bam {
my ($options, $donor_id, $bam_data, $outbase, $donor_base, $type) = @_;

my $repo = select_repo($options, $bam_data->{'aligned_bam'}->{'gnos_repo'});
unless(exists $options->{'keys'}->{$repo}) {
warn sprintf "Skipping %s BAM for Donor %s - No permission key for repo %s", $type, $donor_id, $repo;
Expand All @@ -297,8 +333,8 @@ sub pull_bam {

if(-e $success) {
check_or_create_symlink($orig_bam, $sym_bam);
check_or_create_symlink($orig_bam.'.bai', $sym_bam.'.bai');
create_bas($repo, $gnos_id, $sym_bam);
check_or_create_symlink($orig_bam.'.bai', $sym_bam.'.bai') if(-e $orig_bam.'.bai');
create_bas($repo, $gnos_id, $sym_bam) unless($options->{'analysis'} =~ m/^RNA/);
return;
}
return if($options->{'symlinks'});
Expand Down Expand Up @@ -329,8 +365,9 @@ sub pull_bam {
unlink $err_file;

check_or_create_symlink($orig_bam, $sym_bam);
check_or_create_symlink($orig_bam.'.bai', $sym_bam.'.bai');
my $bas_valid = create_bas($repo, $gnos_id, $sym_bam);
check_or_create_symlink($orig_bam.'.bai', $sym_bam.'.bai') if(-e $orig_bam.'.bai');
my $bas_valid = 1;
$bas_valid = create_bas($repo, $gnos_id, $sym_bam) unless($options->{'analysis'} =~ m/^RNA/);

if($bas_valid == 1) {
# touch a success file in the output loc
Expand Down Expand Up @@ -516,13 +553,15 @@ sub load_data {
warn sprintf "Donor: %s blacklisted\n", $donor->{'donor_unique_id'} if($options->{'debug'});
next;
}
unless($donor->{'flags'}->{'is_normal_specimen_aligned'}) {
warn sprintf "Donor: %s normal sample not aligned\n", $donor->{'donor_unique_id'} if($options->{'debug'});
next;
}
unless($donor->{'flags'}->{'are_all_tumor_specimens_aligned'}) {
warn sprintf "Donor: %s all tumour samples not aligned\n", $donor->{'donor_unique_id'} if($options->{'debug'});
next;
unless($options->{'analysis'} =~ m/^RNA/) {
unless($donor->{'flags'}->{'is_normal_specimen_aligned'}) {
warn sprintf "Donor: %s normal sample not aligned\n", $donor->{'donor_unique_id'} if($options->{'debug'});
next;
}
unless($donor->{'flags'}->{'are_all_tumor_specimens_aligned'}) {
warn sprintf "Donor: %s all tumour samples not aligned\n", $donor->{'donor_unique_id'} if($options->{'debug'});
next;
}
}

if(exists $options->{'COMPOSITE_FILTERS'}->{'multi_tumour'} && $donor->{'flags'}->{'all_tumor_specimen_aliquot_counts'} == 1) {
Expand Down Expand Up @@ -572,7 +611,11 @@ sub load_data {
warn sprintf "Donor: %s has no variant calling available\n", $donor->{'donor_unique_id'} if($options->{'debug'});
next;
}

}
elsif($options->{'analysis'} =~ m/^RNA_(.+)/) {
my $rna_type = lc $1;
next unless( $donor->{'flags'}->{'is_tumor_'.$rna_type.'_rna_seq_alignment_performed'}
|| $donor->{'flags'}->{'is_normal_'.$rna_type.'_rna_seq_alignment_performed'});
}
else {
my $size = data_size_alignments_gb($options, $donor);
Expand Down
10 changes: 7 additions & 3 deletions c/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ MD := mkdir
#Build target executable
BAM_STATS_TARGET=../bin/bam_stats
SQ_TARGET=../bin/reheadSQ
BAM_DIFF=../bin/diff_bams

#
# The following part of the makefile is generic; it can be used to
Expand All @@ -66,7 +67,7 @@ SQ_TARGET=../bin/reheadSQ

.NOTPARALLEL: test

all: clean pre make_htslib_tmp $(BAM_STATS_TARGET) $(BAM2BG_TARGET) $(BAM2BW_TARGET) test remove_htslib_tmp $(CAT_TARGET) $(SQ_TARGET)
all: clean pre make_htslib_tmp $(BAM_STATS_TARGET) $(BAM2BG_TARGET) $(BAM2BW_TARGET) $(BAM_DIFF) test remove_htslib_tmp $(CAT_TARGET) $(SQ_TARGET)
@echo bam_stats and reheadSQ compiled.

$(BAM_STATS_TARGET): $(OBJS)
Expand All @@ -75,6 +76,9 @@ $(BAM_STATS_TARGET): $(OBJS)
$(SQ_TARGET):
$(CC) $(CFLAGS) ./reheadSQ.c -o $(SQ_TARGET)

$(BAM_DIFF):
$(CC) $(CFLAGS) $(INCLUDES) -o $(BAM_DIFF) $(OBJS) $(LFLAGS) $(LIBS) ./diff_bams.c


#Unit Tests
test: $(BAM_STATS_TARGET)
Expand All @@ -97,7 +101,7 @@ remove_htslib_tmp:

copyscript:
cp ./scripts/* ./bin/
chmod a+x $(BAM_STATS_TARGET) $(CAT_TARGET) $(SQ_TARGET) $(BAM2BW_TARGET) $(BAM2BG_TARGET)
chmod a+x $(BAM_STATS_TARGET) $(CAT_TARGET) $(SQ_TARGET) $(BAM2BW_TARGET) $(BAM2BG_TARGET) $(BAM_DIFF)

valgrind:
VALGRIND="valgrind --log-file=/tmp/valgrind-%p.log" $(MAKE)
Expand All @@ -112,7 +116,7 @@ valgrind:

clean:
@echo clean
$(RM) ./*.o *~ $(BAM_STATS_TARGET) $(SQ_TARGET) ./tests/tests_log $(TESTS) ./*.gcda ./*.gcov ./*.gcno *.gcda *.gcov *.gcno ./tests/*.gcda ./tests/*.gcov ./tests/*.gcno
$(RM) ./*.o *~ $(BAM_STATS_TARGET) $(SQ_TARGET) $(BAM_DIFF) ./tests/tests_log $(TESTS) ./*.gcda ./*.gcov ./*.gcno *.gcda *.gcov *.gcno ./tests/*.gcda ./tests/*.gcov ./tests/*.gcno
-rm -rf $(HTSTMP)

depend: $(SRCS)
Expand Down
Loading

0 comments on commit e04f26f

Please sign in to comment.