Skip to content

Commit

Permalink
New test correctly implemented for EI-CoreBioinformatics#132
Browse files Browse the repository at this point in the history
  • Loading branch information
lucventurini committed Oct 6, 2018
1 parent 0379c91 commit 6275cc9
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 11 deletions.
6 changes: 5 additions & 1 deletion Mikado/tests/cds_test_1.gtf
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,8 @@ Chr5 cds_test_1 exon 9929 10172 . + . transcript_id "A4"; gene_id "A"
Chr5 cds_test_1 exon 10620 12665 . + . transcript_id "A4"; gene_id "A"
Chr5 cds_test_1 CDS 10113 10172 . + . transcript_id "A4"; gene_id "A"
Chr5 cds_test_1 CDS 10620 11219 . + . transcript_id "A4"; gene_id "A"

Chr5 cds_test_1 transcript 10113 11219 . + . transcript_id "A5"; gene_id "A"
Chr5 cds_test_1 exon 10113 10172 . + . transcript_id "A5"; gene_id "A"
Chr5 cds_test_1 exon 10620 11219 . + . transcript_id "A5"; gene_id "A"
Chr5 cds_test_1 CDS 10113 10172 . + . transcript_id "A5"; gene_id "A"
Chr5 cds_test_1 CDS 10620 11219 . + . transcript_id "A5"; gene_id "A"
5 changes: 5 additions & 0 deletions Mikado/tests/cds_test_2.gtf
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,8 @@ Chr5 cds_test_2 exon 4551 4679 . - . transcript_id "A4"; gene_id "A"
Chr5 cds_test_2 exon 4765 5043 . - . transcript_id "A4"; gene_id "A"
Chr5 cds_test_2 CDS 4662 4679 . - . transcript_id "A4"; gene_id "A"
Chr5 cds_test_2 CDS 4765 4926 . - . transcript_id "A4"; gene_id "A"
Chr5 cds_test_2 transcript 4662 4926 . - . transcript_id "A5"; gene_id "A"
Chr5 cds_test_2 exon 4662 4679 . - . transcript_id "A5"; gene_id "A"
Chr5 cds_test_2 exon 4765 4926 . - . transcript_id "A5"; gene_id "A"
Chr5 cds_test_2 CDS 4662 4679 . - . transcript_id "A5"; gene_id "A"
Chr5 cds_test_2 CDS 4765 4926 . - . transcript_id "A5"; gene_id "A"
76 changes: 66 additions & 10 deletions Mikado/tests/test_system_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,14 +271,15 @@ def test_cdna_redundant_cds_not(self):
"mikado_prepared.fasta"))

if b is True:
self.assertEqual(len(fa.keys()), 5)
self.assertEqual(sorted(fa.keys()), sorted(["A", "A1", "A2", "A3", "A4"]))
self.assertEqual(len(fa.keys()), 6)
self.assertEqual(sorted(fa.keys()), sorted(["A", "A1", "A2", "A3", "A4", "A5"]))
else:
self.assertEqual(len(fa.keys()), 4)
self.assertEqual(len(fa.keys()), 5)
self.assertIn("A", fa.keys())
self.assertIn("A1", fa.keys())
self.assertTrue("A2" in fa.keys() or "A3" in fa.keys())
self.assertIn("A4", fa.keys())
self.assertIn("A5", fa.keys())
gtf_file = os.path.join(self.conf["prepare"]["files"]["output_dir"], "mikado_prepared.gtf")

coding_count = 0
Expand All @@ -297,12 +298,25 @@ def test_cdna_redundant_cds_not(self):
coding_count += 1
self.assertIn("has_start_codon", transcript.attributes, str(transcript.format("gtf")))
self.assertIn("has_stop_codon", transcript.attributes, str(transcript.format("gtf")))
self.assertEqual(bool(transcript.attributes["has_start_codon"]),
transcript.has_start_codon)
self.assertEqual(bool(transcript.attributes["has_stop_codon"]),
transcript.has_stop_codon)
self.assertEqual(transcript.attributes["has_start_codon"],
transcript.has_start_codon,
(transcript.id,
transcript.attributes["has_start_codon"],
transcript.has_start_codon))
self.assertEqual(transcript.attributes["has_stop_codon"],
transcript.has_stop_codon,
(transcript.id, transcript.attributes["has_stop_codon"],
transcript.has_stop_codon))
self.assertEqual(transcript.is_complete,
transcript.has_start_codon and transcript.has_stop_codon)
self.assertIn("A5", transcripts)
a5 = transcripts["A5"]
self.assertTrue(a5.is_coding)
self.assertIn("has_start_codon", a5.attributes)
self.assertIn("has_stop_codon", a5.attributes)
self.assertTrue(a5.has_start_codon)
self.assertTrue(a5.has_stop_codon)
self.assertTrue(a5.is_complete)

self.assertGreater(coding_count, 0)

Expand All @@ -317,6 +331,7 @@ def test_negative_cdna_redundant_cds_not(self):
self.conf["prepare"]["files"]["out_fasta"] = "mikado_prepared.fasta"
self.conf["prepare"]["files"]["out"] = "mikado_prepared.gtf"
self.conf["prepare"]["strip_cds"] = False
self.conf["prepare"]["minimum_length"] = 150 # Necessary for testing A5

args = Namespace()
args.strip_cds = False
Expand All @@ -332,14 +347,55 @@ def test_negative_cdna_redundant_cds_not(self):
fa = pyfaidx.Fasta(os.path.join(self.conf["prepare"]["files"]["output_dir"],
"mikado_prepared.fasta"))
if b is True:
self.assertEqual(len(fa.keys()), 5)
self.assertEqual(sorted(fa.keys()), sorted(["A", "A1", "A2", "A3", "A4"]))
self.assertEqual(len(fa.keys()), 6)
self.assertEqual(sorted(fa.keys()), sorted(["A", "A1", "A2", "A3", "A4", "A5"]))
else:
self.assertEqual(len(fa.keys()), 4)
self.assertEqual(len(fa.keys()), 5)
self.assertIn("A", fa.keys())
self.assertIn("A1", fa.keys())
self.assertTrue("A2" in fa.keys() or "A3" in fa.keys())
self.assertIn("A4", fa.keys())
self.assertIn("A5", fa.keys())

gtf_file = os.path.join(self.conf["prepare"]["files"]["output_dir"], "mikado_prepared.gtf")

coding_count = 0
with to_gff(gtf_file) as gtf:
lines = [line for line in gtf]
transcripts = dict()
for line in lines:
if line.is_transcript:
transcript = Transcript(line)
transcripts[transcript.id] = transcript
elif line.is_exon:
transcripts[line.transcript].add_exon(line)
[transcripts[_].finalize() for _ in transcripts]
for transcript in transcripts.values():
if transcript.is_coding:
coding_count += 1
self.assertIn("has_start_codon", transcript.attributes, str(transcript.format("gtf")))
self.assertIn("has_stop_codon", transcript.attributes, str(transcript.format("gtf")))
self.assertEqual(transcript.attributes["has_start_codon"],
transcript.has_start_codon,
(transcript.id,
transcript.attributes["has_start_codon"],
transcript.has_start_codon))
self.assertEqual(transcript.attributes["has_stop_codon"],
transcript.has_stop_codon,
(transcript.id, transcript.attributes["has_stop_codon"],
transcript.has_stop_codon))
self.assertEqual(transcript.is_complete,
transcript.has_start_codon and transcript.has_stop_codon)
self.assertIn("A5", transcripts)
a5 = transcripts["A5"]
self.assertTrue(a5.is_coding)
self.assertIn("has_start_codon", a5.attributes)
self.assertIn("has_stop_codon", a5.attributes)
self.assertTrue(a5.has_start_codon)
self.assertTrue(a5.has_stop_codon)
self.assertTrue(a5.is_complete)

self.assertGreater(coding_count, 0)


class CompareCheck(unittest.TestCase):
Expand Down
8 changes: 8 additions & 0 deletions Mikado/transcripts/transcript_methods/finalizing.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,14 @@ def finalize(transcript):
else:
transcript.feature = "transcript"

for prop in ["has_start_codon", "has_stop_codon"]:
if prop in transcript.attributes:
if transcript.is_coding:
transcript.attributes[prop] = bool(transcript.attributes[prop])
setattr(transcript, prop, transcript.attributes[prop])
else:
del transcript.attributes[prop]

if len(transcript.combined_cds) == 0:
transcript.selected_internal_orf_cds = tuple([])
else:
Expand Down

0 comments on commit 6275cc9

Please sign in to comment.