From ffbff8b32b20c16e2ed27bc828236e02cdf6abb1 Mon Sep 17 00:00:00 2001 From: Luca Venturini Date: Thu, 4 Oct 2018 15:34:09 +0100 Subject: [PATCH] We should have fixed #131 --- Mikado/loci/superlocus.py | 54 ++++++++++++++++++++++++++++--------- Mikado/tests/test_scores.py | 5 ++-- 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/Mikado/loci/superlocus.py b/Mikado/loci/superlocus.py index 38063a5a8..ef12c41c3 100644 --- a/Mikado/loci/superlocus.py +++ b/Mikado/loci/superlocus.py @@ -1120,8 +1120,50 @@ def define_loci(self): if self.json_conf["pick"]["alternative_splicing"]["report"] is True: self.define_alternative_splicing() + self.__find_lost_transcripts() + while len(self.lost_transcripts) > 0: + new_locus = None + for transcript in self.lost_transcripts.values(): + if new_locus is None: + new_locus = Superlocus(transcript, + json_conf=self.json_conf, + use_transcript_scores=self._use_transcript_scores, + stranded=self.stranded, + verified_introns=self.locus_verified_introns, + logger = self.logger, + source=self.source + ) + else: + new_locus.add_transcript_to_locus(transcript) + new_locus.define_loci() + self.loci.update(new_locus.loci) + self.__lost = new_locus.lost_transcripts return + def __find_lost_transcripts(self): + + if self.loci_defined is True: + return + + loci_transcripts = itertools.chain(*[{self.loci[_].transcripts.keys()} for _ in self.loci]) + + for tid in set.difference({self.transcripts.keys()}, loci_transcripts): + found = False + for lid in self.loci: + if MonosublocusHolder.in_locus(self.loci[lid], self.transcripts[tid]): + found = True + break + else: + continue + if found is True: + continue + else: + self.__lost.update({tid: self.transcripts[tid]}) + + if len(self.__lost): + self.logger.warning("Lost %s transcripts from %s; starting the recovery process", + len(self.lost_transcripts), self.id) + def define_alternative_splicing(self): """ @@ -1180,7 +1222,6 @@ def define_alternative_splicing(self): # Now we have to recheck that no AS event is linking more than one locus. to_remove = collections.defaultdict(list) - lost_found_ids = set() for lid in self.loci: for tid, transcript in [_ for _ in self.loci[lid].transcripts.items() if _[0] != self.loci[lid].primary_transcript_id]: @@ -1191,23 +1232,12 @@ def define_alternative_splicing(self): self.logger.warning("%s is compatible with more than one locus. Removing it.", tid) to_remove[lid].append(tid) - for tid in self.__lost: - if MonosublocusHolder.in_locus(self.loci[lid], self.lost_transcripts[tid]): - lost_found_ids.add(tid) - for lid in to_remove: for tid in to_remove[lid]: self.loci[lid].remove_transcript_from_locus(tid) self.loci[lid].finalize_alternative_splicing() - for tid in lost_found_ids: - del self.__lost[tid] - - if len(self.__lost): - self.logger.warning("Lost %s transcripts from %s; starting the recovery process", - len(self.lost_transcripts), self.id) - return def calculate_mono_metrics(self): diff --git a/Mikado/tests/test_scores.py b/Mikado/tests/test_scores.py index 82d427ecb..70293026e 100644 --- a/Mikado/tests/test_scores.py +++ b/Mikado/tests/test_scores.py @@ -332,6 +332,7 @@ def test_transcript_missed(self): locus.add_transcript_to_locus(t2) locus.add_transcript_to_locus(t3) locus.define_loci() - self.assertEqual(len(locus.loci), 1) + self.assertEqual(len(locus.loci), 2) primaries = set([locus.loci[_].primary_transcript_id for _ in locus.loci]) - self.assertEqual(primaries, {t3.id}) + self.assertEqual(primaries, {t3.id, t1.id}) + # self.assertEqual(set(locus.lost_transcripts.keys()), {t1.id})