Skip to content

Commit

Permalink
Added successful tests for #127
Browse files Browse the repository at this point in the history
  • Loading branch information
lucventurini committed Sep 27, 2018
1 parent 45f510d commit 6b03ef8
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 0 deletions.
33 changes: 33 additions & 0 deletions Mikado/tests/cds_test_1.gtf
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# To make the type of situation clearer: presume to have four models, A, A’, A’’ and A’’’, all on the same strand and
# with two exons: (1001, 1500) and (1700, 2000). Model A does not have any CDS. Model A’ has an ORF with coordinates
# (1201, 1470). Model A’’ has an ORF with coordinates (1401, 1900). Model A’’’ is identical to model A’’.
# At the moment, as they have the same exonic coordinates, Mikado would consider these four models identical and keep
# only one of these (chosen randomly).

# Chr5 TAIR10 exon 9930 10172 . + . Parent=AT5G01030.1
# Chr5 TAIR10 five_prime_UTR 9930 10172 . + . Parent=AT5G01030.1
# Chr5 TAIR10 exon 10620 12665 . + . Parent=AT5G01030.1

Chr5 cds_test_1 transcript 9930 12665 . + . transcript_id "A"; gene_id "A"
Chr5 cds_test_1 exon 9930 10172 . + . transcript_id "A"; gene_id "A"
Chr5 cds_test_1 exon 10620 12665 . + . transcript_id "A"; gene_id "A"
Chr5 cds_test_1 transcript 9930 12665 . + . transcript_id "A1"; gene_id "A"
Chr5 cds_test_1 exon 9930 10172 . + . transcript_id "A1"; gene_id "A"
Chr5 cds_test_1 exon 10620 12665 . + . transcript_id "A1"; gene_id "A"
Chr5 cds_test_1 CDS 9931 10020 . + . transcript_id "A1"; gene_id "A"
Chr5 cds_test_1 transcript 9930 12665 . + . transcript_id "A2"; gene_id "A"
Chr5 cds_test_1 exon 9930 10172 . + . transcript_id "A2"; gene_id "A"
Chr5 cds_test_1 exon 10620 12665 . + . transcript_id "A2"; gene_id "A"
Chr5 cds_test_1 CDS 10113 10172 . + . transcript_id "A2"; gene_id "A"
Chr5 cds_test_1 CDS 10620 11219 . + . transcript_id "A2"; gene_id "A"
Chr5 cds_test_1 transcript 9930 12665 . + . transcript_id "A3"; gene_id "A"
Chr5 cds_test_1 exon 9930 10172 . + . transcript_id "A3"; gene_id "A"
Chr5 cds_test_1 exon 10620 12665 . + . transcript_id "A3"; gene_id "A"
Chr5 cds_test_1 CDS 10113 10172 . + . transcript_id "A3"; gene_id "A"
Chr5 cds_test_1 CDS 10620 11219 . + . transcript_id "A3"; gene_id "A"
Chr5 cds_test_1 transcript 9929 12665 . + . transcript_id "A4"; gene_id "A"
Chr5 cds_test_1 exon 9929 10172 . + . transcript_id "A4"; gene_id "A"
Chr5 cds_test_1 exon 10620 12665 . + . transcript_id "A4"; gene_id "A"
Chr5 cds_test_1 CDS 10113 10172 . + . transcript_id "A4"; gene_id "A"
Chr5 cds_test_1 CDS 10620 11219 . + . transcript_id "A4"; gene_id "A"

32 changes: 32 additions & 0 deletions Mikado/tests/cds_test_2.gtf
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# To make the type of situation clearer: presume to have four models, A, A’, A’’ and A’’’, all on the same strand and
# with two exons: (1001, 1500) and (1700, 2000). Model A does not have any CDS. Model A’ has an ORF with coordinates
# (1201, 1470). Model A’’ has an ORF with coordinates (1401, 1900). Model A’’’ is identical to model A’’.
# At the moment, as they have the same exonic coordinates, Mikado would consider these four models identical and keep
# only one of these (chosen randomly).

# Chr5 TAIR10 exon 4765 5043 . - . Parent=AT5G01010.1
# Chr5 TAIR10 CDS 4552 4679 . - 2 Parent=AT5G01010.1,AT5G01010.1-Protein;
# Chr5 TAIR10 exon 4552 4679 . - . Parent=AT5G01010.1

Chr5 cds_test_2 transcript 4552 5043 . - . transcript_id "A"; gene_id "A"
Chr5 cds_test_2 exon 4552 4679 . - . transcript_id "A"; gene_id "A"
Chr5 cds_test_2 exon 4765 5043 . - . transcript_id "A"; gene_id "A"
Chr5 cds_test_2 transcript 4552 5043 . - . transcript_id "A1"; gene_id "A"
Chr5 cds_test_2 exon 4552 4679 . - . transcript_id "A1"; gene_id "A"
Chr5 cds_test_2 exon 4765 5043 . - . transcript_id "A1"; gene_id "A"
Chr5 cds_test_2 CDS 4552 4643 . - . transcript_id "A1"; gene_id "A"
Chr5 cds_test_2 transcript 4552 5043 . - . transcript_id "A2"; gene_id "A"
Chr5 cds_test_2 exon 4552 4679 . - . transcript_id "A2"; gene_id "A"
Chr5 cds_test_2 exon 4765 5043 . - . transcript_id "A2"; gene_id "A"
Chr5 cds_test_2 CDS 4662 4679 . - . transcript_id "A2"; gene_id "A"
Chr5 cds_test_2 CDS 4765 4926 . - . transcript_id "A2"; gene_id "A"
Chr5 cds_test_2 transcript 4552 5043 . - . transcript_id "A3"; gene_id "A"
Chr5 cds_test_2 exon 4552 4679 . - . transcript_id "A3"; gene_id "A"
Chr5 cds_test_2 exon 4765 5043 . - . transcript_id "A3"; gene_id "A"
Chr5 cds_test_2 CDS 4662 4679 . - . transcript_id "A3"; gene_id "A"
Chr5 cds_test_2 CDS 4765 4926 . - . transcript_id "A3"; gene_id "A"
Chr5 cds_test_2 transcript 4551 5043 . - . transcript_id "A4"; gene_id "A"
Chr5 cds_test_2 exon 4551 4679 . - . transcript_id "A4"; gene_id "A"
Chr5 cds_test_2 exon 4765 5043 . - . transcript_id "A4"; gene_id "A"
Chr5 cds_test_2 CDS 4662 4679 . - . transcript_id "A4"; gene_id "A"
Chr5 cds_test_2 CDS 4765 4926 . - . transcript_id "A4"; gene_id "A"
70 changes: 70 additions & 0 deletions Mikado/tests/test_system_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,76 @@ def test_prepare_with_cds(self):
else:
self.assertFalse(models[model].is_coding, models[model].format("gtf"))

def test_cdna_redundant_cds_not(self):
"""This test will verify whether the new behaviour of not considering redundant two models with same
exon structure but different CDS does function properly."""

gtf = pkg_resources.resource_filename("Mikado.tests", "cds_test_1.gtf")
self.conf["prepare"]["files"]["gff"] = [gtf]
self.conf["prepare"]["files"]["labels"] = [""]
self.conf["prepare"]["files"]["output_dir"] = tempfile.gettempdir()
self.conf["prepare"]["files"]["out_fasta"] = "mikado_prepared.fasta"
self.conf["prepare"]["files"]["out"] = "mikado_prepared.gtf"
self.conf["prepare"]["strip_cds"] = False

args = Namespace()
args.strip_cds = False
args.json_conf = self.conf
for b in (False, ):
with self.subTest(b=b):
args.json_conf = self.conf
args.keep_redundant = b
args.json_conf["prepare"]["keep_redundant"] = b
prepare.prepare(args, self.logger)
self.assertTrue(os.path.exists(os.path.join(self.conf["prepare"]["files"]["output_dir"],
"mikado_prepared.fasta")))
fa = pyfaidx.Fasta(os.path.join(self.conf["prepare"]["files"]["output_dir"],
"mikado_prepared.fasta"))
if b is True:
self.assertEqual(len(fa.keys()), 5)
self.assertEqual(sorted(fa.keys()), sorted(["A", "A1", "A2", "A3", "A4"]))
else:
self.assertEqual(len(fa.keys()), 4)
self.assertIn("A", fa.keys())
self.assertIn("A1", fa.keys())
self.assertTrue("A2" in fa.keys() or "A3" in fa.keys())
self.assertIn("A4", fa.keys())

def test_negative_cdna_redundant_cds_not(self):
"""This test will verify whether the new behaviour of not considering redundant two models with same
exon structure but different CDS does function properly."""

gtf = pkg_resources.resource_filename("Mikado.tests", "cds_test_2.gtf")
self.conf["prepare"]["files"]["gff"] = [gtf]
self.conf["prepare"]["files"]["labels"] = [""]
self.conf["prepare"]["files"]["output_dir"] = tempfile.gettempdir()
self.conf["prepare"]["files"]["out_fasta"] = "mikado_prepared.fasta"
self.conf["prepare"]["files"]["out"] = "mikado_prepared.gtf"
self.conf["prepare"]["strip_cds"] = False

args = Namespace()
args.strip_cds = False
args.json_conf = self.conf
for b in (False, ):
with self.subTest(b=b):
args.json_conf = self.conf
args.keep_redundant = b
args.json_conf["prepare"]["keep_redundant"] = b
prepare.prepare(args, self.logger)
self.assertTrue(os.path.exists(os.path.join(self.conf["prepare"]["files"]["output_dir"],
"mikado_prepared.fasta")))
fa = pyfaidx.Fasta(os.path.join(self.conf["prepare"]["files"]["output_dir"],
"mikado_prepared.fasta"))
if b is True:
self.assertEqual(len(fa.keys()), 5)
self.assertEqual(sorted(fa.keys()), sorted(["A", "A1", "A2", "A3", "A4"]))
else:
self.assertEqual(len(fa.keys()), 4)
self.assertIn("A", fa.keys())
self.assertIn("A1", fa.keys())
self.assertTrue("A2" in fa.keys() or "A3" in fa.keys())
self.assertIn("A4", fa.keys())


class CompareCheck(unittest.TestCase):

Expand Down

0 comments on commit 6b03ef8

Please sign in to comment.