From 6275cc93e1eba6d880f3d7ab93203332a1abaacc Mon Sep 17 00:00:00 2001 From: Luca Venturini Date: Sat, 6 Oct 2018 10:36:08 +0100 Subject: [PATCH] New test correctly implemented for #132 --- Mikado/tests/cds_test_1.gtf | 6 +- Mikado/tests/cds_test_2.gtf | 5 ++ Mikado/tests/test_system_calls.py | 76 ++++++++++++++++--- .../transcript_methods/finalizing.py | 8 ++ 4 files changed, 84 insertions(+), 11 deletions(-) diff --git a/Mikado/tests/cds_test_1.gtf b/Mikado/tests/cds_test_1.gtf index 9abc51042..b6b0491f7 100644 --- a/Mikado/tests/cds_test_1.gtf +++ b/Mikado/tests/cds_test_1.gtf @@ -30,4 +30,8 @@ Chr5 cds_test_1 exon 9929 10172 . + . transcript_id "A4"; gene_id "A" Chr5 cds_test_1 exon 10620 12665 . + . transcript_id "A4"; gene_id "A" Chr5 cds_test_1 CDS 10113 10172 . + . transcript_id "A4"; gene_id "A" Chr5 cds_test_1 CDS 10620 11219 . + . transcript_id "A4"; gene_id "A" - +Chr5 cds_test_1 transcript 10113 11219 . + . transcript_id "A5"; gene_id "A" +Chr5 cds_test_1 exon 10113 10172 . + . transcript_id "A5"; gene_id "A" +Chr5 cds_test_1 exon 10620 11219 . + . transcript_id "A5"; gene_id "A" +Chr5 cds_test_1 CDS 10113 10172 . + . transcript_id "A5"; gene_id "A" +Chr5 cds_test_1 CDS 10620 11219 . + . transcript_id "A5"; gene_id "A" diff --git a/Mikado/tests/cds_test_2.gtf b/Mikado/tests/cds_test_2.gtf index 64172ead8..f1774844b 100644 --- a/Mikado/tests/cds_test_2.gtf +++ b/Mikado/tests/cds_test_2.gtf @@ -30,3 +30,8 @@ Chr5 cds_test_2 exon 4551 4679 . - . transcript_id "A4"; gene_id "A" Chr5 cds_test_2 exon 4765 5043 . - . transcript_id "A4"; gene_id "A" Chr5 cds_test_2 CDS 4662 4679 . - . transcript_id "A4"; gene_id "A" Chr5 cds_test_2 CDS 4765 4926 . - . transcript_id "A4"; gene_id "A" +Chr5 cds_test_2 transcript 4662 4926 . - . transcript_id "A5"; gene_id "A" +Chr5 cds_test_2 exon 4662 4679 . - . transcript_id "A5"; gene_id "A" +Chr5 cds_test_2 exon 4765 4926 . - . transcript_id "A5"; gene_id "A" +Chr5 cds_test_2 CDS 4662 4679 . - . transcript_id "A5"; gene_id "A" +Chr5 cds_test_2 CDS 4765 4926 . - . transcript_id "A5"; gene_id "A" \ No newline at end of file diff --git a/Mikado/tests/test_system_calls.py b/Mikado/tests/test_system_calls.py index 77ea0f9b4..9af5a70d5 100644 --- a/Mikado/tests/test_system_calls.py +++ b/Mikado/tests/test_system_calls.py @@ -271,14 +271,15 @@ def test_cdna_redundant_cds_not(self): "mikado_prepared.fasta")) if b is True: - self.assertEqual(len(fa.keys()), 5) - self.assertEqual(sorted(fa.keys()), sorted(["A", "A1", "A2", "A3", "A4"])) + self.assertEqual(len(fa.keys()), 6) + self.assertEqual(sorted(fa.keys()), sorted(["A", "A1", "A2", "A3", "A4", "A5"])) else: - self.assertEqual(len(fa.keys()), 4) + self.assertEqual(len(fa.keys()), 5) self.assertIn("A", fa.keys()) self.assertIn("A1", fa.keys()) self.assertTrue("A2" in fa.keys() or "A3" in fa.keys()) self.assertIn("A4", fa.keys()) + self.assertIn("A5", fa.keys()) gtf_file = os.path.join(self.conf["prepare"]["files"]["output_dir"], "mikado_prepared.gtf") coding_count = 0 @@ -297,12 +298,25 @@ def test_cdna_redundant_cds_not(self): coding_count += 1 self.assertIn("has_start_codon", transcript.attributes, str(transcript.format("gtf"))) self.assertIn("has_stop_codon", transcript.attributes, str(transcript.format("gtf"))) - self.assertEqual(bool(transcript.attributes["has_start_codon"]), - transcript.has_start_codon) - self.assertEqual(bool(transcript.attributes["has_stop_codon"]), - transcript.has_stop_codon) + self.assertEqual(transcript.attributes["has_start_codon"], + transcript.has_start_codon, + (transcript.id, + transcript.attributes["has_start_codon"], + transcript.has_start_codon)) + self.assertEqual(transcript.attributes["has_stop_codon"], + transcript.has_stop_codon, + (transcript.id, transcript.attributes["has_stop_codon"], + transcript.has_stop_codon)) self.assertEqual(transcript.is_complete, transcript.has_start_codon and transcript.has_stop_codon) + self.assertIn("A5", transcripts) + a5 = transcripts["A5"] + self.assertTrue(a5.is_coding) + self.assertIn("has_start_codon", a5.attributes) + self.assertIn("has_stop_codon", a5.attributes) + self.assertTrue(a5.has_start_codon) + self.assertTrue(a5.has_stop_codon) + self.assertTrue(a5.is_complete) self.assertGreater(coding_count, 0) @@ -317,6 +331,7 @@ def test_negative_cdna_redundant_cds_not(self): self.conf["prepare"]["files"]["out_fasta"] = "mikado_prepared.fasta" self.conf["prepare"]["files"]["out"] = "mikado_prepared.gtf" self.conf["prepare"]["strip_cds"] = False + self.conf["prepare"]["minimum_length"] = 150 # Necessary for testing A5 args = Namespace() args.strip_cds = False @@ -332,14 +347,55 @@ def test_negative_cdna_redundant_cds_not(self): fa = pyfaidx.Fasta(os.path.join(self.conf["prepare"]["files"]["output_dir"], "mikado_prepared.fasta")) if b is True: - self.assertEqual(len(fa.keys()), 5) - self.assertEqual(sorted(fa.keys()), sorted(["A", "A1", "A2", "A3", "A4"])) + self.assertEqual(len(fa.keys()), 6) + self.assertEqual(sorted(fa.keys()), sorted(["A", "A1", "A2", "A3", "A4", "A5"])) else: - self.assertEqual(len(fa.keys()), 4) + self.assertEqual(len(fa.keys()), 5) self.assertIn("A", fa.keys()) self.assertIn("A1", fa.keys()) self.assertTrue("A2" in fa.keys() or "A3" in fa.keys()) self.assertIn("A4", fa.keys()) + self.assertIn("A5", fa.keys()) + + gtf_file = os.path.join(self.conf["prepare"]["files"]["output_dir"], "mikado_prepared.gtf") + + coding_count = 0 + with to_gff(gtf_file) as gtf: + lines = [line for line in gtf] + transcripts = dict() + for line in lines: + if line.is_transcript: + transcript = Transcript(line) + transcripts[transcript.id] = transcript + elif line.is_exon: + transcripts[line.transcript].add_exon(line) + [transcripts[_].finalize() for _ in transcripts] + for transcript in transcripts.values(): + if transcript.is_coding: + coding_count += 1 + self.assertIn("has_start_codon", transcript.attributes, str(transcript.format("gtf"))) + self.assertIn("has_stop_codon", transcript.attributes, str(transcript.format("gtf"))) + self.assertEqual(transcript.attributes["has_start_codon"], + transcript.has_start_codon, + (transcript.id, + transcript.attributes["has_start_codon"], + transcript.has_start_codon)) + self.assertEqual(transcript.attributes["has_stop_codon"], + transcript.has_stop_codon, + (transcript.id, transcript.attributes["has_stop_codon"], + transcript.has_stop_codon)) + self.assertEqual(transcript.is_complete, + transcript.has_start_codon and transcript.has_stop_codon) + self.assertIn("A5", transcripts) + a5 = transcripts["A5"] + self.assertTrue(a5.is_coding) + self.assertIn("has_start_codon", a5.attributes) + self.assertIn("has_stop_codon", a5.attributes) + self.assertTrue(a5.has_start_codon) + self.assertTrue(a5.has_stop_codon) + self.assertTrue(a5.is_complete) + + self.assertGreater(coding_count, 0) class CompareCheck(unittest.TestCase): diff --git a/Mikado/transcripts/transcript_methods/finalizing.py b/Mikado/transcripts/transcript_methods/finalizing.py index d9a856e93..31fb8023f 100644 --- a/Mikado/transcripts/transcript_methods/finalizing.py +++ b/Mikado/transcripts/transcript_methods/finalizing.py @@ -645,6 +645,14 @@ def finalize(transcript): else: transcript.feature = "transcript" + for prop in ["has_start_codon", "has_stop_codon"]: + if prop in transcript.attributes: + if transcript.is_coding: + transcript.attributes[prop] = bool(transcript.attributes[prop]) + setattr(transcript, prop, transcript.attributes[prop]) + else: + del transcript.attributes[prop] + if len(transcript.combined_cds) == 0: transcript.selected_internal_orf_cds = tuple([]) else: