Skip to content

Commit

Permalink
Fix for #153
Browse files Browse the repository at this point in the history
  • Loading branch information
lucventurini committed Mar 8, 2019
1 parent efea2c6 commit cfe3119
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 87 deletions.
72 changes: 33 additions & 39 deletions Mikado/transcripts/transcript.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,11 +588,13 @@ def __setstate__(self, state):
self.logger = None

def __getattribute__(self, item):

if "external" in item and item != "external" and "." in item:
return getattr(self.external_scores, item.split(".")[1])
else:
return super().__getattribute__(item)
try:
return object.__getattribute__(self, item)
except AttributeError as exc:
if "external" in item and item != "external" and "." in item:
return getattr(self.external_scores, item.split(".")[1])
else:
raise AttributeError(exc)

# ######## Class instance methods ####################

Expand All @@ -603,30 +605,31 @@ def add_exon(self, gffline, feature=None, phase=None):
:type feature: flag to indicate what kind of feature we are adding
"""

if isinstance(gffline, (tuple, list)):
_gtype = type(gffline)

if _gtype in (tuple, list):
assert len(gffline) == 2
try:
start, end = sorted(gffline)
except TypeError:
raise TypeError((gffline, type(gffline)))
raise TypeError((gffline, _gtype))
if feature is None:
feature = "exon"
elif isinstance(gffline, intervaltree.Interval):
elif _gtype is intervaltree.Interval:
start, end = gffline[0], gffline[1]
if feature is None:
feature = "exon"
elif isinstance(gffline, Interval):
elif _gtype is Interval:
start, end = gffline.start, gffline.end
if feature is None:
feature = "exon"
elif not isinstance(gffline, (GtfLine, GffLine)):
raise InvalidTranscript("Unkwown feature type! %s",
type(gffline))
elif _gtype not in (GtfLine, GffLine):
raise InvalidTranscript("Unkwown feature type! %s", _gtype)
else:
start, end = sorted([gffline.start, gffline.end])
if feature is None:
feature = gffline.feature
if isinstance(gffline, GffLine) and "cdna_match" in gffline.feature.lower():
if _gtype is GffLine and "cdna_match" in gffline.feature.lower():
gffline.parent = gffline.id

if self.id not in gffline.parent:
Expand Down Expand Up @@ -669,11 +672,7 @@ def add_exon(self, gffline, feature=None, phase=None):

segment = tuple([int(start), int(end)])
if segment in store:
# raise InvalidTranscript(
# "Attempt to add {} to {}, but it is already present!".format(
# segment, self.id))
return
# assert isinstance(segment[0], int) and isinstance(segment[1], int)
if self.__expandable is True:
self.start = min([self.start, start])
self.end = max([self.end, end])
Expand Down Expand Up @@ -1382,10 +1381,6 @@ def get_available_metrics(cls) -> list:
metrics = [member[0] for member in inspect.getmembers(cls) if
"__" not in member[0] and isinstance(cls.__dict__[member[0]], Metric)]

# metrics = list(x[0] for x in filter(
# lambda y: "__" not in y[0] and isinstance(cls.__dict__[y[0]], Metric),
# inspect.getmembers(cls)))
# assert "tid" in metrics and "parent" in metrics and "score" in metrics
_metrics = sorted([metric for metric in metrics])
final_metrics = ["tid", "alias", "parent", "original_source", "score"] + _metrics
return final_metrics
Expand Down Expand Up @@ -1552,9 +1547,12 @@ def parent(self, parent):
:type parent: list
:type parent: str
"""
if isinstance(parent, (list, type(None))):

_ptype = type(parent)

if _ptype in (list, type(None)):
self.__parent = parent
elif isinstance(parent, str):
elif _ptype is str:
if "," in parent:
self.__parent = parent.split(",")
else:
Expand Down Expand Up @@ -1896,6 +1894,13 @@ def exons(self, *args):

self.__exons = list(sorted(args[0]))

def _set_exons(self, exons):
"""Private method that bypasses the checks within the direct setter, for speed purposes in finalising."""
if self.finalized is True:
raise NotImplementedError("I cannot reset the exons in a finalised transcript.")

self.__exons = exons

@property
def combined_cds_introns(self):
"""This property returns the introns which are located between CDS
Expand Down Expand Up @@ -1976,14 +1981,6 @@ def combined_cds(self, combined):
if ((not isinstance(combined, list)) or
any(self.__wrong_combined_entry(comb) for comb in combined)):
raise TypeError("Invalid value for combined CDS: {0}".format(combined))
# if len(combined) > 0:
# if isinstance(combined[0], tuple):
# try:
# combined = [intervaltree.Interval(_[0], _[1]) for _ in combined]
# except IndexError:
# raise IndexError(combined)
# else:
# assert isinstance(combined[0], intervaltree.Interval)

if len(combined) > 0:
ar = np.array(list(zip(*combined)))
Expand Down Expand Up @@ -2282,14 +2279,11 @@ def combined_utr_fraction(self):
@Metric
def cdna_length(self):
"""This property returns the length of the transcript."""
# try:
# self.__cdna_length = sum([e[1] - e[0] + 1 for e in self.exons])
# except TypeError:
# raise TypeError(self.exons)
# if self.__cdna_length is None:
ar = np.array(list(zip(*self.exons)))
self.__cdna_length = int(np.subtract(ar[1], ar[0] -1).sum())

if self.__cdna_length is None and self.finalized is True:
raise AssertionError
if self.finalized is False or self.__cdna_length is None:
ar = np.array(list(zip(*self.exons)))
self.__cdna_length = int(np.subtract(ar[1], ar[0] - 1).sum())
return self.__cdna_length

cdna_length.category = "cDNA"
Expand Down
94 changes: 46 additions & 48 deletions Mikado/transcripts/transcript_methods/finalizing.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ def __basic_final_checks(transcript):
:return:
"""

if len(transcript.exons) == 0:
_exons = transcript.exons

if not _exons:
if transcript._possibly_without_exons is True:
transcript.logger.debug("Inferring that %s is a single-exon transcript")
new_exon = (transcript.start, transcript.end)
Expand All @@ -33,41 +35,41 @@ def __basic_final_checks(transcript):
raise exc
else:
# Let us try to derive exons from CDS ...
transcript.exons = sorted([tuple([int(exon[0]), int(exon[1])]) for exon in transcript.combined_cds])
_exons = sorted([tuple([int(exon[0]), int(exon[1])]) for exon in transcript.combined_cds])
if len(transcript.combined_utr) == 0:
# Enlarge the terminal exons to include the starts
if transcript.start is not None:
transcript.exons[0] = (transcript.start, transcript.exons[0][1])
_exons[0] = (transcript.start, _exons[0][1])
if transcript.end is not None:
transcript.exons[-1] = (transcript.exons[-1][0], transcript.end)
_exons[-1] = (_exons[-1][0], transcript.end)
else:
__utr = sorted([tuple([int(exon[0]), int(exon[1])]) for exon in transcript.combined_utr])
try:
__before = [_ for _ in __utr if _[1] < transcript.exons[0][0]]
__before = [_ for _ in __utr if _[1] < _exons[0][0]]
except IndexError:
raise IndexError((__utr, transcript.exons))
if __before[-1][1] == transcript.exons[0][0] - 1:
transcript.exons[0] = (__before[-1][0], transcript.exons[0][1])
raise IndexError((__utr, _exons))
if __before[-1][1] == _exons[0][0] - 1:
_exons[0] = (__before[-1][0], _exons[0][1])
__before.pop()
__after = [_ for _ in __utr if _[0] > transcript.exons[-1][1]]
if __after[0][0] == transcript.exons[-1][1] + 1:
transcript.exons[-1] = (transcript.exons[-1][0], __after[0][1])
__after = [_ for _ in __utr if _[0] > _exons[-1][1]]
if __after[0][0] == _exons[-1][1] + 1:
_exons[-1] = (_exons[-1][0], __after[0][1])
__after = __after[1:]
transcript.exons = __before + transcript.exons + __after
_exons = __before + _exons + __after

transcript.logger.debug("Converting to tuples")
transcript.exons = [tuple([int(exon[0]), int(exon[1])]) for exon in transcript.exons]
_exons = [tuple([int(exon[0]), int(exon[1])]) for exon in _exons]

new_exons = []
invalid = False
# invalid = False

# Set the start and end automatically if none has been explicitly provided
if transcript.start is None:
transcript.start = min(_[0] for _ in transcript.exons)
transcript.start = min(_[0] for _ in _exons)
if transcript.end is None:
transcript.end = max(_[1] for _ in transcript.exons)
transcript.end = max(_[1] for _ in _exons)

for exon in transcript.exons:
for exon in _exons:
if not isinstance(exon, tuple):
if (isinstance(exon, Interval) or
(isinstance(exon, list) and len(exon) == 2 and
Expand All @@ -83,7 +85,7 @@ def __basic_final_checks(transcript):
raise exc
new_exons.append(exon)

transcript.exons = sorted(new_exons)
transcript._set_exons(sorted(new_exons))

if len(transcript.exons) > 1 and transcript.strand is None:
if transcript._accept_undefined_multi is False:
Expand Down Expand Up @@ -533,70 +535,65 @@ def __check_phase_correctness(transcript):
:return: Mikado.loci.transcript.Transcript
"""

if min(len(transcript.segments), len(transcript.internal_orfs)) == 0:
transcript.logger.debug("Redefining segments for %s", transcript.id)
segments, internal_orfs = transcript.segments, transcript.internal_orfs

if min(len(segments), len(internal_orfs)) == 0:
# transcript.logger.debug("Redefining segments for %s", transcript.id)
# Define exons
transcript.segments = [("exon", tuple([e[0], e[1]]))
for e in transcript.exons]
segments = [("exon", tuple([e[0], e[1]])) for e in transcript.exons]
# Define CDS
if len(transcript.internal_orfs) > 0:
for orf in transcript.internal_orfs:
if len(internal_orfs) > 0:
for orf in internal_orfs:
for segment in orf:
if segment[0] == "exon":
continue
elif segment[0] == "UTR":
transcript.segments.append(("UTR", (segment[1][0], segment[1][1])))
segments.append(("UTR", (segment[1][0], segment[1][1])))
elif segment[0] == "CDS":
transcript.segments.append(("CDS", (segment[1][0], segment[1][1])))
segments.append(("CDS", (segment[1][0], segment[1][1])))
else:
transcript.segments.extend([("CDS", tuple([c[0], c[1]]))
for c in transcript.combined_cds])
segments.extend([("CDS", tuple([c[0], c[1]])) for c in transcript.combined_cds])
# Define UTR segments
transcript.segments.extend([("UTR", tuple([u[0], u[1]]))
for u in transcript.combined_utr])
segments.extend([("UTR", tuple([u[0], u[1]])) for u in transcript.combined_utr])
# Mix and sort
transcript.segments = sorted(transcript.segments, key=operator.itemgetter(1, 0))
segments = sorted(segments, key=operator.itemgetter(1, 0))
# Add to the store as a single entity
if not transcript.internal_orfs and any(_[0] == "CDS" for _ in transcript.segments):
transcript.internal_orfs = [transcript.segments]
if not internal_orfs and any(_[0] == "CDS" for _ in segments):
internal_orfs = [segments]
else:
transcript.selected_internal_orf_index = None
elif len(transcript.internal_orfs) == 0:
elif len(internal_orfs) == 0:
exception = AssertionError("No internal ORF for {}".format(transcript.id))
transcript.logger.exception(exception)
raise exception
else:
transcript.logger.debug("Segments and ORFs defined for %s", transcript.id)
pass

transcript.logger.debug("{} has {} internal ORF{}".format(
transcript.id, len(transcript.internal_orfs),
"s" if len(transcript.internal_orfs) > 1 else ""))
transcript.segments, transcript.internal_orfs = segments, internal_orfs

__orfs_to_remove = []
for orf_index in range(len(transcript.internal_orfs)):
transcript.logger.debug("ORF #%d for %s: %s",
orf_index, transcript.id, transcript.internal_orfs[orf_index])
for orf_index in range(len(internal_orfs)):
# transcript.logger.debug("ORF #%d for %s: %s",
# orf_index, transcript.id, transcript.internal_orfs[orf_index])
try:
transcript = __check_internal_orf(transcript,
orf_index)
transcript = __check_internal_orf(transcript, orf_index)
except (InvalidTranscript, InvalidCDS) as exc:
transcript.logger.warning("ORF %s of %s is invalid, removing. Reason: %s",
orf_index, transcript.id, exc)
__orfs_to_remove.append(orf_index)

# transcript.logger.warning("Stripping the CDS from %s, error: %s",
# transcript.id, exc)
__num_orfs = transcript.number_internal_orfs
__num_orfs = len(internal_orfs)
if (__num_orfs > 0) and (len(__orfs_to_remove) == __num_orfs):
transcript.logger.warning("Every ORF of %s is invalid, stripping the CDS", transcript.id)
transcript.strip_cds(strand_specific=True)
elif len(__orfs_to_remove):
transcript.logger.warning("Stripping %s of %s ORFs out of %s",
transcript.id, len(__orfs_to_remove), __num_orfs)
for orf_index in reversed(sorted(__orfs_to_remove)):
transcript.internal_orfs.pop(orf_index)
internal_orfs.pop(orf_index)
transcript.internal_orfs = internal_orfs
else:
transcript.logger.debug("All internal ORFs of %s pass the muster.", transcript.id)
pass

if len(transcript.internal_orfs) > 0:
transcript.selected_internal_orf_index = 0
Expand Down Expand Up @@ -748,6 +745,7 @@ def finalize(transcript):
transcript.attributes.pop(prop)

# transcript = __calc_cds_introns(transcript)
_ = transcript.cdna_length

transcript.finalized = True
transcript.logger.debug("Finished finalising %s", transcript.id)
Expand Down

0 comments on commit cfe3119

Please sign in to comment.