Skip to content

Commit

Permalink
FIX coordinates of orfs with start/stop codon on exon boundary
Browse files Browse the repository at this point in the history
This addresses Issue #64.
  • Loading branch information
bmmalone committed Mar 31, 2017
1 parent dfec093 commit 63ca81c
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 7 deletions.
2 changes: 1 addition & 1 deletion rpbp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__version_info__ = ('1', '1', '5')
__version_info__ = ('1', '1', '6')
__version__ = '.'.join(__version_info__)
38 changes: 33 additions & 5 deletions rpbp/reference_preprocessing/extract_orf_coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,11 @@ def get_orfs(transcript_and_sequence, start_codons_re, stop_codons_re):
transcript_length = len(transcript_sequence)

# get the ORFs for this entry
orf_rel_positions = get_orf_positions(transcript_sequence, start_codons_re, stop_codons_re)
orf_rel_positions = get_orf_positions(
transcript_sequence,
start_codons_re,
stop_codons_re
)

#if logger.isEnabledFor(logging.DEBUG):
# s = ["({},{})".format(o.start, o.end) for o in orf_rel_positions]
Expand All @@ -186,15 +190,39 @@ def get_orfs(transcript_and_sequence, start_codons_re, stop_codons_re):

# we need the block information to convert between relative and genomic coordinates
start = transcript['start']
block_lengths = np.fromstring(transcript['exon_lengths'], sep=',', dtype=int)

block_lengths = np.fromstring(
transcript['exon_lengths'],
sep=',',
dtype=int
)

block_starts = np.zeros(len(block_lengths), dtype=int)
block_starts[1:] = np.cumsum(block_lengths)[:-1]
block_relative_starts = np.fromstring(transcript['exon_genomic_relative_starts'], sep=',', dtype=int)

block_relative_starts = np.fromstring(
transcript['exon_genomic_relative_starts'],
sep=',',
dtype=int
)

# for a discussion about why
# please see Issue #64: https://github.com/dieterich-lab/rp-bp/issues/64
orf_gen_positions = [
orf_position(
start=bed_utils.get_gen_pos(o.start, start, block_lengths, block_starts, block_relative_starts),
end=bed_utils.get_gen_pos(o.end, start, block_lengths, block_starts, block_relative_starts)
start=bed_utils.get_gen_pos(
o.start-1,
start,
block_lengths,
block_starts,
block_relative_starts)+1,

end=bed_utils.get_gen_pos(
o.end-1,
start,
block_lengths,
block_starts,
block_relative_starts)+1
) for o in orf_rel_positions
]

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def readme():
return f.read()

setup(name='rpbp',
version='1.1.5',
version='1.1.6',
description="This package contains the Rp-Bp pipeline for predicting translation of open reading frames from ribosome profiling data.",
long_description=readme(),
keywords="rpbp ribosome profiling bayesian inference markov chain monte carlo translation",
Expand Down

0 comments on commit 63ca81c

Please sign in to comment.