diff --git a/Mikado/subprograms/util/convert.py b/Mikado/subprograms/util/convert.py index 8b37cf045..9b7a92965 100644 --- a/Mikado/subprograms/util/convert.py +++ b/Mikado/subprograms/util/convert.py @@ -29,7 +29,7 @@ def _convert_bam(parser, args, out_format): )) transcript.parent = gene print(Gene(transcript).format(out_format, transcriptomic=args.transcriptomic), file=args.out) - continue + return def _convert_gtf(parser: GTF, args, out_format): @@ -77,6 +77,7 @@ def _convert_gtf(parser: GTF, args, out_format): elif current is not None: print(genes[current].format(out_format, transcriptomic=args.transcriptomic), file=args.out) + return def _convert_bed12(parser: Bed12Parser, args, out_format): mock_gene_counter = 0 @@ -88,6 +89,7 @@ def _convert_bed12(parser: Bed12Parser, args, out_format): gene = Gene(Transcript(line)) print(gene.format(out_format, transcriptomic=args.transcriptomic), file=args.out) + return def _convert_gff(parser: GFF3, args, out_format): orphaned = dict() @@ -123,7 +125,8 @@ def _convert_gff(parser: GFF3, args, out_format): del orphaned[tid] elif line.is_transcript is True and args.assume_sorted is False: transcript = Transcript(line) - tid2gene[transcript.id] = transcript.parent[0] + if transcript.parent: + tid2gene[transcript.id] = transcript.parent[0] if transcript.id in orphaned: assert transcript.chrom == orphaned[transcript.id].chrom assert transcript.strand == orphaned[transcript.id].strand @@ -134,7 +137,7 @@ def _convert_gff(parser: GFF3, args, out_format): if orphaned[transcript.id].parent and orphaned[transcript.id].parent[0] in genes: genes[orphaned[transcript.id].parent[0]].add(orphaned[transcript.id]) del orphaned[transcript.id] - elif transcript.parent[0] in genes: + elif transcript.parent and transcript.parent[0] in genes: genes[transcript.parent[0]].add(transcript) else: orphaned[transcript.id] = transcript @@ -168,15 +171,21 @@ def _convert_gff(parser: GFF3, args, out_format): if args.assume_sorted is False: for tid in orphaned: - for parent in orphaned[tid].parent: - if parent in genes: - genes[parent].add(orphaned[tid]) - else: - genes[parent] = Gene(orphaned[tid]) + if not orphaned[tid].parent: + orphaned[tid].parent = "{}.gene".format(orphaned[tid].id) + genes[orphaned[tid].parent[0]] = orphaned[tid] + else: + for parent in orphaned[tid].parent: + if parent in genes: + genes[parent].add(orphaned[tid]) + else: + genes[parent] = Gene(orphaned[tid]) for gid, gene in genes.items(): print(gene.format(out_format, transcriptomic=args.transcriptomic), file=args.out) + return + def launch(args): @@ -208,7 +217,6 @@ def launch(args): if parser.__annot_type__ == "bam": _convert_bam(parser, args, out_format) - return elif parser.__annot_type__ == "gtf": _convert_gtf(parser, args, out_format) elif parser.__annot_type__ == "bed12": diff --git a/Mikado/tests/test_system_calls.py b/Mikado/tests/test_system_calls.py index b4e50759d..8ae70be1c 100644 --- a/Mikado/tests/test_system_calls.py +++ b/Mikado/tests/test_system_calls.py @@ -65,10 +65,13 @@ def test_convert_from_bam(self): args = parser.parse_args(argv) launch(args) # pkg_resources.load_entry_point("Mikado", "console_scripts", "mikado")() + outfile.flush() self.assertGreater(os.stat(outfile.name).st_size, 0) lines = [_ for _ in open(outfile.name)] if outp == "gff3": self.assertEqual(len(lines), 1826) + elif outp == "bed12": + self.assertEqual(len(lines), 270) print(os.stat(outfile.name).st_size, len(lines)) self.assertTrue(any(["TraesCS2B02G055500.1" in line for line in lines]))