Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --allow-multimapping #205

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions rMATS_pipeline/rmatspipeline/rmatspipeline.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -272,11 +272,12 @@ cdef cbool read_has_nh_tag_1(const BamAlignment& bread) nogil:

@boundscheck(False)
@wraparound(False)
cdef int filter_read(const BamAlignment& bread, const cbool& ispaired) nogil:
cdef int filter_read(const BamAlignment& bread, const cbool& ispaired,
cbool allow_multimapping) nogil:
if ispaired and not bread.IsProperPair():
return READ_NOT_PAIRED

if not read_has_nh_tag_1(bread):
if not (allow_multimapping or read_has_nh_tag_1(bread)):
return READ_NOT_NH_1

return READ_USED
Expand Down Expand Up @@ -743,6 +744,7 @@ cdef void parse_bam(long fidx, string bam,
cbool issingle, int jld2, int readLength,
cbool variable_read_length, int dt, cbool& novelSS,
long& mil, long& mel, cbool allow_clipping,
cbool allow_multimapping,
vector[int64_t]& read_outcome_counts) nogil:
"""TODO: Docstring for parse_bam.
:returns: TODO
Expand Down Expand Up @@ -802,7 +804,7 @@ cdef void parse_bam(long fidx, string bam,
read_outcome_counts[READ_CLIPPED] += 1
continue

filter_outcome = filter_read(bread, ispaired)
filter_outcome = filter_read(bread, ispaired, allow_multimapping)
if filter_outcome != READ_USED:
read_outcome_counts[filter_outcome] += 1
continue
Expand Down Expand Up @@ -968,6 +970,7 @@ cdef void detect_novel(str bams, unordered_map[int,cset[string]]& geneGroup,
long mil = args.mil
long mel = args.mel
cbool allow_clipping = args.allow_clipping
cbool allow_multimapping = args.allow_multimapping
vector[vector[int64_t]] read_outcome_counts

dt = args.dt
Expand All @@ -988,7 +991,7 @@ cdef void detect_novel(str bams, unordered_map[int,cset[string]]& geneGroup,
parse_bam(fidx, vbams[fidx], geneGroup, genes, supple, novel_juncs[fidx],
exons[fidx], multis[fidx], issingle, jld2, readLength,
variable_read_length, dt, novelSS, mil, mel, allow_clipping,
read_outcome_counts[fidx])
allow_multimapping, read_outcome_counts[fidx])

output_read_outcomes(read_outcome_counts, vbams, args.tmp, args.prep_prefix)

Expand Down
3 changes: 3 additions & 0 deletions rmats.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,9 @@ def get_args():
parser.add_argument('--allow-clipping', action='store_true',
help='Allow alignments with soft or hard clipping to be used',
dest='allow_clipping')
parser.add_argument('--allow-multimapping', action='store_true',
help='Allow alignments that are mapped ambiguously (NH tag != 1)',
dest='allow_multimapping')
parser.add_argument('--fixed-event-set', action='store', help='A directory containing fromGTF.[AS].txt files to be used instead of detecting a new set of events')

args = parser.parse_args()
Expand Down
2 changes: 2 additions & 0 deletions tests/allow_multimapping/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/multimapping_allowed/
/multimapping_not_allowed/
220 changes: 220 additions & 0 deletions tests/allow_multimapping/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
import os.path
import unittest

import tests.base_test
import tests.output_parser as output_parser
import tests.test_config
import tests.util


class AllowMultimappingBaseTest(tests.base_test.BaseTest):
def setUp(self):
super().setUp()

self._test_base_dir = tests.test_config.TEST_BASE_DIR
self._test_dir = os.path.join(self._test_base_dir,
'allow_multimapping',
self._sub_test_name())
self._generated_input_dir = os.path.join(self._test_dir,
'generated_input')
self._out_dir = os.path.join(self._test_dir, 'out')
self._tmp_dir = os.path.join(self._test_dir, 'tmp')
tests.util.recreate_dirs([
self._generated_input_dir, self._out_dir, self._tmp_dir,
self._command_output_dir()
])

self._read_type = 'paired'
self._read_length = 50
self._chromosome_length = 2000
self._task = 'both'

self._sample_1_bams_path = os.path.join(self._generated_input_dir,
'b1.txt')
sample_1_bam_replicate_template = os.path.join(
self._generated_input_dir, 'sample_1_rep_{}.bam')
self._sample_1_bams = self._create_sample_1_bams(
self._sample_1_bams_path, sample_1_bam_replicate_template)

self._sample_2_bams_path = os.path.join(self._generated_input_dir,
'b2.txt')
sample_2_bam_replicate_template = os.path.join(
self._generated_input_dir, 'sample_2_rep_{}.bam')
self._sample_2_bams = self._create_sample_2_bams(
self._sample_2_bams_path, sample_2_bam_replicate_template)

self._gtf_path = os.path.join(self._generated_input_dir, 'test.gtf')
self._gtf = self._create_gtf_from_transcripts(
self._gtf_path, self._exons_by_transcript())

def _command_output_dir(self):
return os.path.join(self._test_dir, 'command_output')

def _rmats_arguments(self):
return [
'--b1',
self._sample_1_bams_path,
'--b2',
self._sample_2_bams_path,
'--gtf',
self._gtf_path,
'--od',
self._out_dir,
'-t',
self._read_type,
'--readLength',
str(self._read_length),
'--tmp',
self._tmp_dir,
'--task',
self._task,
]

def _create_sample_1_bams(self, sample_1_bams_path,
sample_1_replicate_template):
rep_1_bam_path = sample_1_replicate_template.format(1)
rep_1_bam = self._create_bam_from_paired_read_coords(
rep_1_bam_path,
self._chromosome_length,
self._read_length,
self._paired_read_coords_1_1(),
multimapping=self._read_multimapping_1_1())

rep_2_bam_path = sample_1_replicate_template.format(2)
rep_2_bam = self._create_bam_from_paired_read_coords(
rep_2_bam_path,
self._chromosome_length,
self._read_length,
self._paired_read_coords_1_2(),
multimapping=self._read_multimapping_1_2())

sample_1_bams = [rep_1_bam, rep_2_bam]
self._write_bams(sample_1_bams, sample_1_bams_path)
return sample_1_bams

def _create_sample_2_bams(self, sample_2_bams_path,
sample_2_replicate_template):
rep_1_bam_path = sample_2_replicate_template.format(1)
rep_1_bam = self._create_bam_from_paired_read_coords(
rep_1_bam_path,
self._chromosome_length,
self._read_length,
self._paired_read_coords_2_1(),
multimapping=self._read_multimapping_2_1())

rep_2_bam_path = sample_2_replicate_template.format(2)
rep_2_bam = self._create_bam_from_paired_read_coords(
rep_2_bam_path,
self._chromosome_length,
self._read_length,
self._paired_read_coords_2_2(),
multimapping=self._read_multimapping_2_2())

sample_2_bams = [rep_1_bam, rep_2_bam]
self._write_bams(sample_2_bams, sample_2_bams_path)
return sample_2_bams

def _exons_by_transcript(self):
return [
[(1, 100), (201, 300), (401, 500)],
[(1, 100), (401, 500)],
]

def _include_read(self):
return ([[81, 100], [201, 300]], [[201, 300]])

def _skip_read(self):
return ([[81, 100], [401, 500]], [[401, 500]])

def _paired_read_coords_1_1(self):
return [
self._include_read(),
self._skip_read(),
]

def _read_multimapping_1_1(self):
return [1, 1]

def _paired_read_coords_1_2(self):
return [
self._include_read(),
self._include_read(),
self._skip_read(),
]

def _read_multimapping_1_2(self):
return [2, 2, 1]

def _paired_read_coords_2_1(self):
return [
self._include_read(),
self._skip_read(),
]

def _read_multimapping_2_1(self):
return [1, 1]

def _paired_read_coords_2_2(self):
return [
self._include_read(),
self._skip_read(),
self._skip_read(),
]

def _read_multimapping_2_2(self):
return [1, 2, 2]


class MultimappingAllowedTest(AllowMultimappingBaseTest):
def _sub_test_name(self):
return 'multimapping_allowed'

def test(self):
self._run_test()

def _rmats_arguments(self):
arguments = super()._rmats_arguments()
arguments.append('--allow-multimapping')
return arguments

def _check_results(self):
self._check_no_error_results()

se_mats_jc_path = os.path.join(self._out_dir, 'SE.MATS.JC.txt')
se_mats_jc_header, se_mats_jc_rows, error = output_parser.parse_mats_jc(
se_mats_jc_path)
self.assertFalse(error)
self._check_se_mats_jc_header(se_mats_jc_header)
self.assertEqual(len(se_mats_jc_rows), 1)
se_mats_jc_row_0 = se_mats_jc_rows[0]
self.assertEqual(se_mats_jc_row_0['IJC_SAMPLE_1'], '1,2')
self.assertEqual(se_mats_jc_row_0['SJC_SAMPLE_1'], '1,1')
self.assertEqual(se_mats_jc_row_0['IJC_SAMPLE_2'], '1,1')
self.assertEqual(se_mats_jc_row_0['SJC_SAMPLE_2'], '1,2')


class MultimappingNotAllowedTest(AllowMultimappingBaseTest):
def _sub_test_name(self):
return 'multimapping_not_allowed'

def test(self):
self._run_test()

def _check_results(self):
self._check_no_error_results()

se_mats_jc_path = os.path.join(self._out_dir, 'SE.MATS.JC.txt')
se_mats_jc_header, se_mats_jc_rows, error = output_parser.parse_mats_jc(
se_mats_jc_path)
self.assertFalse(error)
self._check_se_mats_jc_header(se_mats_jc_header)
self.assertEqual(len(se_mats_jc_rows), 1)
se_mats_jc_row_0 = se_mats_jc_rows[0]
self.assertEqual(se_mats_jc_row_0['IJC_SAMPLE_1'], '1,0')
self.assertEqual(se_mats_jc_row_0['SJC_SAMPLE_1'], '1,1')
self.assertEqual(se_mats_jc_row_0['IJC_SAMPLE_2'], '1,1')
self.assertEqual(se_mats_jc_row_0['SJC_SAMPLE_2'], '1,0')


if __name__ == '__main__':
unittest.main(verbosity=2)
5 changes: 4 additions & 1 deletion tests/bam.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ def set_read_pair_from_intervals(read_1,
read_length,
clip_length=None,
is_reversed_1=False,
is_reversed_2=True):
is_reversed_2=True,
num_mappings=1):
cigar_1 = list()
remaining_length = read_length
prev_end = None
Expand Down Expand Up @@ -206,6 +207,8 @@ def set_read_pair_from_intervals(read_1,
read_1.template_len = (read_2_end - read_1_start) + 1
read_1.is_reversed = is_reversed_1
read_2.is_reversed = is_reversed_2
read_1.number_of_alignments = num_mappings
read_2.number_of_alignments = num_mappings
make_read_pair(read_1, read_2)
return None

Expand Down
11 changes: 9 additions & 2 deletions tests/base_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,18 @@ def _create_bam_from_paired_read_coords(self,
paired_read_coords,
clip_length=None,
is_reversed_1=False,
is_reversed_2=True):
is_reversed_2=True,
multimapping=list()):
bam = tests.bam.BAM()
bam.path = bam_path

bam_reads = list()
for i, coord_pair in enumerate(paired_read_coords):
if multimapping:
num_mappings = multimapping[i]
else:
num_mappings = 1

read_1_coords, read_2_coords = coord_pair
paired_read_1 = tests.bam.Read()
paired_read_1.ref_seq_name = '1' # chromosome
Expand All @@ -125,7 +131,8 @@ def _create_bam_from_paired_read_coords(self,
read_length,
clip_length=clip_length,
is_reversed_1=is_reversed_1,
is_reversed_2=is_reversed_2)
is_reversed_2=is_reversed_2,
num_mappings=num_mappings)
self.assertFalse(error)
bam_reads.extend([paired_read_1, paired_read_2])

Expand Down
2 changes: 2 additions & 0 deletions tests/runner.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import unittest

import tests.allow_clipping.test as allow_clipping_test
import tests.allow_multimapping.test as allow_multimapping_test
import tests.alternative_3_splice_site_novel.test as alternative_3_splice_site_novel_test
import tests.alternative_5_splice_site_novel.test as alternative_5_splice_site_novel_test
import tests.fixed_event_set.test as fixed_event_set_test
Expand All @@ -22,6 +23,7 @@ def build_test_suite():
loader = unittest.defaultTestLoader
suite = unittest.TestSuite()
suite.addTest(loader.loadTestsFromModule(allow_clipping_test))
suite.addTest(loader.loadTestsFromModule(allow_multimapping_test))
suite.addTest(
loader.loadTestsFromModule(alternative_3_splice_site_novel_test))
suite.addTest(
Expand Down