From 67d46727ef5604d563eed0bc7c640b13d5d49860 Mon Sep 17 00:00:00 2001
From: Luca Venturini <luca.venturini@earlham.ac.uk>
Date: Fri, 12 Mar 2021 21:42:40 +0000
Subject: [PATCH] Fix #387. Add tests (with relative patches for minor,
 untriggered bugs) for BED12 and ORF loading. CHANGELOG updated.

---
 CHANGELOG.md                                |  13 +-
 Mikado/_transcripts/transcript_base.py      |   8 +-
 Mikado/configuration/configuration.py       |  20 +-
 Mikado/configuration/configurator.py        |  20 +-
 Mikado/configuration/daijin_configurator.py |   4 +-
 Mikado/loci/abstractlocus.py                |   1 +
 Mikado/loci/excluded.py                     |   1 -
 Mikado/loci/locus.py                        | 400 +-------------------
 Mikado/loci/superlocus.py                   |  33 +-
 Mikado/parsers/__init__.py                  |   3 +
 Mikado/parsers/bed12.py                     |  76 ++--
 Mikado/preparation/prepare.py               |  18 +-
 Mikado/subprograms/configure.py             |  14 +-
 Mikado/subprograms/pick.py                  |  14 +-
 Mikado/subprograms/prepare.py               |  14 +-
 Mikado/subprograms/serialise.py             |  15 +-
 Mikado/tests/locus_test.py                  |  18 +-
 Mikado/tests/test_bed12.py                  |  21 +
 Mikado/tests/test_external_async.py         |  43 ++-
 Mikado/tests/test_system_calls.py           |   5 +-
 Mikado/transcripts/pad.py                   | 388 +++++++++++++++++++
 sample_data/Snakefile                       |   4 +-
 22 files changed, 626 insertions(+), 507 deletions(-)
 create mode 100644 Mikado/transcripts/pad.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8b4169751..61b262f78 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,10 +10,17 @@ Other changes:
   provided as a stream is *disabled* though.
 - Fix [#382](https://github.com/EI-CoreBioinformatics/mikado/issues/382): now Mikado can accept generic BED12 files 
   as input junctions, not just Portcullis junctions. This allows e.g. a user to provide a ***set of gene models*** 
-  in BED12 format as sources of valid junctions.  
-- Slightly increased the unit-test coverage for the locus classes, e.g. properly covering the `as_dict` and `load_dict`
-  methods. Minor bugfixes related to the introduction of these unit-tests.
+  in BED12 format as sources of valid junctions.
+- Fix [#387](https://github.com/EI-CoreBioinformatics/mikado/issues/387): now Mikado will always use a static seed, 
+  rather than generating a new one per call unless specifically instructed to do so. The old behaviour can still be 
+  replicated by either setting the `seed` parameter to `null` (ie `None`) in the configuration file, or by 
+  specifying `--random-seed` during the command invocation.
+- General increase in code unit-test coverage; in particular:  
+  - Slightly increased the unit-test coverage for the locus classes, e.g. properly covering the `as_dict` and `load_dict`
+    methods. Minor bugfixes related to the introduction of these unit-tests.
 - `Mikado.parsers.to_gff` has been renamed to `Mikado.parsers.parser_factory`.
+- The code related to the transcript padding has been moved to the submodule `Mikado.transcripts.pad`, rather than 
+  being part of the `Mikado.loci.locus` submodule.
 - Mikado will error informatively if the scoring configuration file is malformed.
 
 # Version 2.1.1
diff --git a/Mikado/_transcripts/transcript_base.py b/Mikado/_transcripts/transcript_base.py
index fee851719..e9c9dfbe5 100644
--- a/Mikado/_transcripts/transcript_base.py
+++ b/Mikado/_transcripts/transcript_base.py
@@ -827,8 +827,8 @@ def get_internal_orf_beds(self) -> List[BED12]:
                 new_row.thick_start = utr + 1
                 new_row.thick_end = new_row.thick_start + cds_len - 1
                 new_row.name = "{}_orf{}".format(self.tid, index)
-                new_row.block_starts = [row.thick_start]
-                new_row.block_sizes = [cds_len]
+                new_row.block_starts = [0]
+                new_row.block_sizes = [self.cdna_length]
                 new_row.phase = phase
                 # self.logger.debug(new_row)
                 new_row = BED12(new_row,
@@ -849,6 +849,10 @@ def get_internal_orf_beds(self) -> List[BED12]:
 
                 yield new_row
 
+    @property
+    def orfs(self) -> List[BED12]:
+        return list(self.get_internal_orf_beds())
+
     @Metric
     def is_reference(self):
         """Checks whether the transcript has been marked as reference by Mikado prepare"""
diff --git a/Mikado/configuration/configuration.py b/Mikado/configuration/configuration.py
index 1f028f25a..74c5bb5d4 100644
--- a/Mikado/configuration/configuration.py
+++ b/Mikado/configuration/configuration.py
@@ -1,6 +1,7 @@
 import copy
 import dataclasses
 from dataclasses import field
+import random
 from marshmallow import validate, ValidationError
 from marshmallow_dataclass import dataclass, Optional
 from .picking_config import PickConfiguration
@@ -41,8 +42,10 @@ class MikadoConfiguration:
         "required": True
     })
     seed: int = field(default=0, metadata={
-        "metadata": {"description": "Random number generator seed, to ensure reproducibility across runs"},
-        "validate": validate.Range(min=0, max=2 ** 32 - 1)
+        "metadata": {"description": "Random number generator seed, to ensure reproducibility across runs. Set to None"
+                     "('null' in YAML/JSON/TOML files) to let Mikado select a random seed every time."},
+        "validate": validate.Range(min=0, max=2 ** 32 - 1),
+        "allow_none": True, "required": True
     })
     multiprocessing_method: Optional[str] = field(default="spawn", metadata={
         "metadata": {"description": "Which method (fork, spawn, forkserver) Mikado should use for multiprocessing"},
@@ -75,11 +78,18 @@ def __post_init__(self):
     def copy(self):
         return copy.copy(self)
 
-    def check(self):
+    def check(self, logger=create_null_logger()):
+        if self.seed is None:
+            self.seed = random.randint(0, 2 ** 32 - 1)
+            logger.info(f"Random seed: {self.seed}")
         if self.scoring is None or not hasattr(self.scoring.requirements, "parameters"):
-            self.load_scoring()
+            self.load_scoring(logger=logger)
         self.scoring.check(minimal_orf_length=self.pick.orf_loading.minimal_orf_length)
-        self.Schema().validate(dataclasses.asdict(self))
+        errors = self.Schema().validate(dataclasses.asdict(self))
+        if len(errors) > 0:
+            exc = InvalidConfiguration(f"The configuration is invalid, please double check. Errors:\n{errors}")
+            logger.critical(exc)
+            raise exc
 
     def load_scoring(self, logger=None):
         """
diff --git a/Mikado/configuration/configurator.py b/Mikado/configuration/configurator.py
index f1b61c637..e41114a2a 100644
--- a/Mikado/configuration/configurator.py
+++ b/Mikado/configuration/configurator.py
@@ -118,24 +118,16 @@ def check_and_load_scoring(configuration: Union[DaijinConfiguration, MikadoConfi
 
     try:
         configuration.load_scoring(logger=logger)
-        configuration.check()
+        configuration.check(logger=logger)
         configuration = check_db(configuration)
         if not configuration.multiprocessing_method:
             configuration.multiprocessing_method = get_start_method()
-
-    except Exception as exc:
+    except InvalidConfiguration as exc:
         logger.exception(exc)
         raise
 
-    seed = configuration.seed
-
-    if seed != 0:
-        # numpy.random.seed(seed % (2 ** 32 - 1))
-        random.seed(seed % (2 ** 32 - 1))
-    else:
-        # numpy.random.seed(None)
-        random.seed(None)
-
+    assert configuration.seed is not None
+    random.seed(configuration.seed % (2 ** 32 - 1))
     return configuration
 
 
@@ -212,10 +204,6 @@ def load_and_validate_config(raw_configuration: Union[None, MikadoConfiguration,
         logger.exception("Loading the configuration file failed with error:\n%s\n\n\n", exc)
         raise InvalidConfiguration("The configuration file passed is invalid. Please double check.")
 
-    if config.seed == 0 or config.seed is None:
-        config.seed = random.randint(1, 2 ** 32 - 1)
-        logger.info("Random seed: {}", config.seed)
-
     random.seed(config.seed % (2 ** 32 - 1))
 
     return config
diff --git a/Mikado/configuration/daijin_configurator.py b/Mikado/configuration/daijin_configurator.py
index 444dbdd41..b07bf21b0 100644
--- a/Mikado/configuration/daijin_configurator.py
+++ b/Mikado/configuration/daijin_configurator.py
@@ -6,7 +6,7 @@
 import toml
 import yaml
 from pkg_resources import resource_stream
-from .configurator import create_cluster_config
+from .configurator import create_cluster_config, load_and_validate_config
 from . import print_config
 from .daijin_configuration import DaijinConfiguration
 from .._transcripts.scoring_configuration import ScoringFile
@@ -254,6 +254,8 @@ def create_daijin_config(args: Namespace, config=None, level="ERROR", piped=Fals
 
     final_config = config.copy()
 
+    final_config = load_and_validate_config(final_config)
+
     if args.exe:
         with open(args.exe, "wt") as out:
             for key, val in dataclasses.asdict(final_config.load).items():
diff --git a/Mikado/loci/abstractlocus.py b/Mikado/loci/abstractlocus.py
index a57963f75..cdd5c6362 100644
--- a/Mikado/loci/abstractlocus.py
+++ b/Mikado/loci/abstractlocus.py
@@ -258,6 +258,7 @@ def as_dict(self) -> dict:
         state["transcripts"] = dict((tid, state["transcripts"][tid].as_dict()) for tid in state["transcripts"])
         assert "metrics_calculated" in state
         state["json_conf"] = dataclasses.asdict(state["json_conf"])
+        assert state["json_conf"]["seed"] is not None
         return state
 
     def load_dict(self, state: dict, load_transcripts=True, load_configuration=True):
diff --git a/Mikado/loci/excluded.py b/Mikado/loci/excluded.py
index 3420d4aed..7fc7b9865 100644
--- a/Mikado/loci/excluded.py
+++ b/Mikado/loci/excluded.py
@@ -34,7 +34,6 @@ def __init__(self, monosublocus_instance=None, configuration=None, logger=None):
         Abstractlocus.__init__(self, configuration=configuration)
         self.splitted = False
         self.metrics_calculated = False
-        # self.configuration = configuration
         self.logger = logger
         if isinstance(monosublocus_instance, Transcript):
             Abstractlocus.__init__(self, transcript_instance=monosublocus_instance)
diff --git a/Mikado/loci/locus.py b/Mikado/loci/locus.py
index 44c59d4af..cd83f996d 100644
--- a/Mikado/loci/locus.py
+++ b/Mikado/loci/locus.py
@@ -10,13 +10,11 @@
 import operator
 from collections import defaultdict
 import pysam
+from ..transcripts.expansion import expand_transcript
 from ..transcripts.transcript import Transcript
-# from ..configuration.picking_config import valid_as_ccodes, redundant_as_ccodes
-from ..transcripts.transcriptchecker import TranscriptChecker
 from .abstractlocus import Abstractlocus
 from ..parsers.GFF import GffLine
 from ..scales.assignment.assigner import Assigner
-from ..exceptions import InvalidTranscript
 import networkx as nx
 import random
 
@@ -537,7 +535,7 @@ def add_transcript_to_locus(self, transcript: Transcript, check_in_locus=True,
 
         # Add a check similar to what we do for the minimum requirements and the fragments
         if to_be_added and self.configuration.scoring.as_requirements:
-            to_be_added = self.__check_as_requirements(transcript, is_reference=reference_pass)
+            to_be_added = self._check_as_requirements(transcript, is_reference=reference_pass)
 
         if to_be_added is True:
             is_alternative, ccode, _ = self.is_alternative_splicing(transcript)
@@ -565,7 +563,7 @@ def add_transcript_to_locus(self, transcript: Transcript, check_in_locus=True,
 
         self.locus_verified_introns.update(transcript.verified_introns)
 
-    def __check_as_requirements(self, transcript: Transcript, is_reference=False) -> bool:
+    def _check_as_requirements(self, transcript: Transcript, is_reference=False) -> bool:
         """Private method to evaluate a transcript for inclusion in the locus.
         This method uses the "as_requirements" section of the configuration file to perform the
         evaluation.
@@ -573,7 +571,6 @@ def __check_as_requirements(self, transcript: Transcript, is_reference=False) ->
         will always evaluate to True (ie the transcript is valid).
         """
 
-
         to_be_added = True
         if is_reference is True and self.configuration.pick.run_options.check_references is False:
             return True
@@ -982,7 +979,8 @@ def pad_transcripts(self, backup=None) -> set:
                                                __to_modify[tid][1],
                                                self.fai,
                                                self.logger)
-            if (new_transcript.start == self.transcripts[tid].end) and (new_transcript.end == self.transcripts[tid].end):
+            if (new_transcript.start == self.transcripts[tid].end) and \
+                    (new_transcript.end == self.transcripts[tid].end):
                 self.logger.debug("No expansion took place for %s!", tid)
             else:
                 self.logger.debug("Expansion took place for %s!", tid)
@@ -1059,8 +1057,7 @@ def define_graph(self, objects: dict, inters=None, three_prime=False) -> nx.DiGr
 
         graph = nx.DiGraph()
         graph.add_nodes_from(objects.keys())
-        if inters is None:
-            inters = self._share_extreme
+        inters = self._share_extreme if inters is None else inters
 
         if len(objects) >= 2:
             if (three_prime is True and self.strand != "-") or (three_prime is False and self.strand == "-"):
@@ -1424,388 +1421,3 @@ def _remove_from_redundant_splicing_codes(self, *ccodes):
         sub = [_ for _ in sub if _ not in ccodes]
         self.logger.debug("New redundant ccodes: %s", sub)
         self.redundant_ccodes = sub
-
-        
-def expand_transcript(transcript: Transcript,
-                      backup: Transcript,
-                      start_transcript: [Transcript, bool],
-                      end_transcript: [Transcript, bool],
-                      fai: pysam.libcfaidx.FastaFile,
-                      logger):
-
-    """This method will enlarge the coordinates and exon structure of a transcript, given:
-    :param transcript: the transcript to modify.
-    :type transcript: Transcript
-    :param start_transcript: the template transcript for the 5' end.
-    :param end_transcript: the template transcript for the 3' end.
-    :param fai: the indexed genomic sequence.
-    :param logger: the logger to be used in the function.
-    """
-
-    # If there is nothing to do, just get out
-    transcript.finalize()
-    if start_transcript not in (False, None):
-        start_transcript.finalize()
-    if end_transcript not in (False, None):
-        end_transcript.finalize()
-
-    if start_transcript in (False, None) and end_transcript in (False, None):
-        logger.debug("%s does not need to be expanded, exiting", transcript.id)
-        return transcript
-
-    if transcript.strand == "-":
-        start_transcript, end_transcript = end_transcript, start_transcript
-
-    # Make a backup copy of the transcript
-    # First get the ORFs
-    # Remove the CDS and unfinalize
-    logger.debug("Starting expansion of %s", transcript.id)
-    strand = transcript.strand
-    transcript.strip_cds()
-    transcript.unfinalize()
-    assert strand == transcript.strand
-
-    upstream, up_exons, new_first_exon, up_remove = _enlarge_start(transcript, backup, start_transcript)
-    downstream, up_exons, down_exons, down_remove = _enlarge_end(transcript,
-                                                                 backup, end_transcript, up_exons, new_first_exon)
-
-    first_exon, last_exon = transcript.exons[0], transcript.exons[-1]
-
-    assert upstream >= 0 and downstream >= 0
-
-    if up_remove is True:
-        # Remove the first exon
-        transcript.remove_exon(first_exon)
-    if down_remove is True:
-        if not (up_remove is True and first_exon == last_exon):
-            transcript.remove_exon(last_exon)
-
-    new_exons = up_exons + down_exons
-    if not new_exons:
-        logger.debug("%s does not need to be expanded, exiting", transcript.id)
-        return backup
-
-    transcript.add_exons(new_exons)
-    transcript.start, transcript.end = None, None
-    transcript.finalize()
-
-    if transcript.strand == "-":
-        downstream, upstream = upstream, downstream
-
-    if (up_exons or down_exons):
-        if backup.is_coding:
-            seq = check_expanded(transcript, backup, start_transcript, end_transcript,
-                                 fai, upstream, downstream, logger)
-            transcript = enlarge_orfs(transcript, backup, seq, upstream, downstream, logger)
-            transcript.finalize()
-    else:
-        return backup
-
-    # Now finalize again
-    logger.debug("%s: start (before %s, now %s, %s), end (before %s, now %s, %s)",
-                 transcript.id,
-                 backup.start, transcript.start, transcript.start < backup.start,
-                 backup.end, transcript.end, transcript.end > backup.end)
-    if transcript.start < backup.start or transcript.end > backup.end:
-        transcript.attributes["padded"] = True
-
-    # Now check that we have a valid expansion
-    if backup.is_coding and not transcript.is_coding:
-        # Something has gone wrong. Just return the original transcript.
-        assert new_exons
-        logger.info("Padding %s would lead to an invalid CDS (up exons: %s). Aborting.",
-                    transcript.id, up_exons)
-        return backup
-    elif backup.is_coding:
-        abort = False
-        if backup.strand == "-" and backup.combined_cds_end < transcript.combined_cds_end:
-            abort = True
-        elif backup.strand != "-" and backup.combined_cds_end > transcript.combined_cds_end:
-            abort = True
-        if abort is True:
-            msg = "Padding {} (strand: {}) would lead to an in-frame stop codon ({} to {}, \
-vs original {} to {}. Aborting.".format(
-                transcript.id, backup.strand, transcript.combined_cds_start, transcript.combined_cds_end,
-                backup.combined_cds_start, backup.combined_cds_end)
-            logger.info(msg)
-            return backup
-
-    return transcript
-
-
-def _enlarge_start(transcript: Transcript,
-                   backup: Transcript,
-                   start_transcript: Transcript) -> (int, list, [None, tuple], bool):
-
-    """This method will enlarge the transcript at the 5' end, using another transcript as the template.
-    :param transcript: the original transcript to modify.
-    :param backup: a copy of the transcript. As we are modifying the original one, we do need a hard copy.
-    :param start_transcript: the template transcript.
-
-    The function returns the following:
-    :returns: the upstream modification, the list of upstream exons to add, the new first exon (if any),
-              a boolean flag indicating whether the first exon of the transcript should be removed.
-    """
-
-    upstream = 0
-    up_exons = []
-    new_first_exon = None
-    to_remove = False
-    if start_transcript:
-        transcript.start = start_transcript.start
-        upstream_exons = sorted([_ for _ in
-                                 start_transcript.find_upstream(transcript.exons[0][0], transcript.exons[0][1])
-                                      if _.value == "exon"])
-        intersecting_upstream = sorted(start_transcript.search(
-            transcript.exons[0][0], transcript.exons[0][1]))
-
-        if not intersecting_upstream:
-            raise KeyError("No exon or intron found to be intersecting with %s vs %s, this is a mistake",
-                           transcript.id, start_transcript.id)
-
-        if intersecting_upstream[0].value == "exon":
-            new_first_exon = (min(intersecting_upstream[0][0], backup.start),
-                              transcript.exons[0][1])
-            if new_first_exon != transcript.exons[0]:
-                upstream += backup.start - new_first_exon[0]
-                up_exons.append(new_first_exon)
-                to_remove = True
-            else:
-                new_first_exon = None
-            if intersecting_upstream[0] in upstream_exons:
-                upstream_exons.remove(intersecting_upstream[0])
-            upstream += sum(_[1] - _[0] + 1 for _ in upstream_exons)
-            up_exons.extend([(_[0], _[1]) for _ in upstream_exons])
-        elif intersecting_upstream[0].value == "intron":
-            # Check whether the first exon of the model *ends* within an *intron* of the template
-            # If that is the case, we have to keep the first exon in place and
-            # just expand it until the end
-            # Now we have to expand until the first exon in the upstream_exons
-            if intersecting_upstream[0][1] == transcript.exons[0][0] - 1:
-                assert upstream_exons
-                to_remove = False
-            elif upstream_exons:
-                to_remove = True
-                upstream_exon = upstream_exons[-1]
-                new_first_exon = (upstream_exon[0], transcript.exons[0][1])
-                upstream_exons.remove(upstream_exon)
-                upstream += backup.start - new_first_exon[0]
-                up_exons.append(new_first_exon)
-            else:
-                # Something fishy going on here. Let us double check everything.
-                if start_transcript.exons[0][0] == transcript.start:
-                    raise ValueError(
-                        "Something has gone wrong. The template transcript should have returned upstream exons."
-                    )
-                elif start_transcript.exons[0][0] < transcript.start:
-                    raise ValueError(
-                        "Something has gone wrong. We should have found the correct exons."
-                    )
-                else:
-                    pass
-
-            upstream += sum(_[1] - _[0] + 1 for _ in upstream_exons)
-            up_exons.extend([(_[0], _[1]) for _ in upstream_exons])
-
-    return upstream, up_exons, new_first_exon, to_remove
-
-
-def _enlarge_end(transcript: Transcript,
-                 backup: Transcript,
-                 end_transcript: Transcript,
-                 up_exons: list,
-                 new_first_exon: [None, tuple]) -> [int, list, list, bool]:
-
-    """
-    This method will enlarge the transcript at the 5' end, using another transcript as the template.
-    :param transcript: the original transcript to modify.
-    :param backup: a copy of the transcript. As we are modifying the original one, we do need a hard copy.
-    :param end_transcript: the template transcript.
-    :param up_exons: the list of exons added at the 5' end.
-    :param new_first_exon: the new coordinates of what used to be the first exon of the transcript.
-                           This is necessary because if the transcript is monoexonic, we might need to re-modify it.
-
-    The function returns the following:
-    :returns: the downstream modification, the (potentially modified) list of upstream exons to add,
-              the list of downstream exons to add, a boolean flag indicating whether the last exon of the transcript
-              should be removed.
-    """
-
-    downstream = 0
-    down_exons = []
-    to_remove = False
-
-    if end_transcript:
-        transcript.end = end_transcript.end
-        downstream_exons = sorted([_ for _ in
-                                    end_transcript.find_downstream(transcript.exons[-1][0], transcript.exons[-1][1])
-                                    if _.value == "exon"])
-        intersecting_downstream = sorted(end_transcript.search(
-            transcript.exons[-1][0], transcript.exons[-1][1]))
-        if not intersecting_downstream:
-            raise KeyError("No exon or intron found to be intersecting with %s vs %s, this is a mistake",
-                           transcript.id, end_transcript.id)
-        # We are taking the right-most intersecting element.
-        if intersecting_downstream[-1].value == "exon":
-            if transcript.monoexonic and new_first_exon is not None:
-                new_exon = (new_first_exon[0], max(intersecting_downstream[-1][1], new_first_exon[1]))
-                if new_exon != new_first_exon:
-                    up_exons.remove(new_first_exon)
-                    downstream += new_exon[1] - backup.end
-                    down_exons.append(new_exon)
-                    to_remove = True
-            else:
-                new_exon = (transcript.exons[-1][0],
-                            max(intersecting_downstream[-1][1], transcript.exons[-1][1]))
-                if new_exon != transcript.exons[-1]:
-                    downstream += new_exon[1] - backup.end
-                    down_exons.append(new_exon)
-                    to_remove = True
-
-            if intersecting_downstream[-1] in downstream_exons:
-                downstream_exons.remove(intersecting_downstream[-1])
-            downstream += sum(_[1] - _[0] + 1 for _ in downstream_exons)
-            down_exons.extend([(_[0], _[1]) for _ in downstream_exons])
-        elif intersecting_downstream[-1].value == "intron":
-            # Now we have to expand until the first exon in the upstream_exons
-            if intersecting_downstream[-1][0] == transcript.exons[-1][1] + 1:
-                assert downstream_exons
-                to_remove = False
-            elif downstream_exons:
-                downstream_exon = downstream_exons[0]
-                assert downstream_exon[1] > backup.end
-                assert downstream_exon[0] > backup.end
-                if transcript.monoexonic and new_first_exon is not None:
-                    new_exon = (new_first_exon[0], downstream_exon[1])
-                    up_exons.remove(new_first_exon)
-                    to_remove = True
-                else:
-                    new_exon = (transcript.exons[-1][0], downstream_exon[1])
-                    to_remove = True
-                downstream_exons.remove(downstream_exon)
-                downstream += new_exon[1] - backup.end
-                down_exons.append(new_exon)
-            else:
-                # Something fishy going on here. Let us double check everything.
-                if end_transcript.exons[-1][1] == transcript.end:
-                    raise ValueError(
-                        "Something has gone wrong. The template transcript should have returned upstream exons."
-                    )
-                elif end_transcript.exons[-1][1] > transcript.end:
-                    raise ValueError(
-                        "Something has gone wrong. We should have found the correct exons."
-                    )
-            downstream += sum(_[1] - _[0] + 1 for _ in downstream_exons)
-            down_exons.extend([(_[0], _[1]) for _ in downstream_exons])
-
-    return downstream, up_exons, down_exons, to_remove
-
-
-def check_expanded(transcript, backup, start_transcript, end_transcript, fai, upstream, downstream, logger) -> str:
-
-    """
-    This function checks that the expanded transcript is valid, and it also calculates and returns its cDNA sequence.
-    :param transcript: the modified transcript.
-    :param backup: The original transcript, before expansion.
-    :param start_transcript: the transcript used as template at the 5' end.
-    :param end_transcript: the transcript used as template at the 3' end.
-    :param fai: The pysam.libcfaidx.FastaFile object indexing the genome.
-    :param upstream: the amount of transcriptomic base-pairs added to the transcript at its 5' end.
-    :param downstream: the amount of transcriptomic base-pairs added to the transcript at its 3' end.
-    :param logger: the logger to use.
-    :returns: the cDNA of the modified transcript, as a standard Python string.
-    """
-
-    assert transcript.exons != backup.exons
-    assert transcript.end <= fai.get_reference_length(transcript.chrom), (
-        transcript.end, fai.get_reference_length(transcript.chrom))
-    genome_seq = fai.fetch(transcript.chrom, transcript.start - 1, transcript.end)
-
-    if not (transcript.exons[-1][1] - transcript.start + 1 == len(genome_seq)):
-        error = "{} should have a sequence of length {} ({} start, {} end), but one of length {} has been given"
-        error = error.format(transcript.id, transcript.exons[-1][1] - transcript.start + 1,
-                             transcript.start, transcript.end, len(genome_seq))
-        logger.error(error)
-        raise InvalidTranscript(error)
-    seq = TranscriptChecker(transcript, genome_seq, is_reference=True).cdna
-    assert len(seq) == transcript.cdna_length, (len(seq), transcript.cdna_length, transcript.exons)
-    if not len(seq) == backup.cdna_length + upstream + downstream:
-        error = [len(seq), backup.cdna_length + upstream + downstream,
-                 backup.cdna_length, upstream, downstream,
-                 (transcript.start, transcript.end), (backup.id, backup.start, backup.end),
-                 (None if not start_transcript else (start_transcript.id, (start_transcript.start,
-                                                                           start_transcript.end))),
-                 (None if not end_transcript else (end_transcript.id, (end_transcript.start,
-                                                                       end_transcript.end))),
-                 (backup.id, backup.exons),
-                 None if not start_transcript else (start_transcript.id, start_transcript.exons),
-                 None if not end_transcript else (end_transcript.id, end_transcript.exons),
-                 (transcript.id + "_expanded", transcript.exons),
-                 set.difference(set(transcript.exons), set(backup.exons)),
-                 set.difference(set(backup.exons), set(transcript.exons))
-                 ]
-        error = "\n".join([str(_) for _ in error])
-        raise AssertionError(error)
-    return seq
-
-
-def enlarge_orfs(transcript: Transcript,
-                 backup: Transcript,
-                 seq: str,
-                 upstream: int,
-                 downstream: int,
-                 logger) -> Transcript:
-
-    """
-    This method will take an expanded transcript and recalculate its ORF(s). As a consequence of the expansion,
-    truncated transcripts might become whole.
-    :param transcript: the expanded transcript.
-    :param backup: the original transcript. Used to extract the original ORF(s).
-    :param seq: the new cDNA sequence of the expanded transcript.
-    :param upstream: the amount of expansion that happened at the 5'.
-    :param downstream: the amount of expansion that happened at the 3'.
-    :param logger: the logger.
-    :returns: the modified transcript with the ORF(s) recalculated.
-    """
-
-    if backup.combined_cds_length > 0:
-        try:
-            internal_orfs = list(backup.get_internal_orf_beds())
-        except (ValueError, TypeError, AssertionError):
-            logger.error("Something went wrong with the CDS extraction for %s. Stripping it.",
-                         backup.id)
-            internal_orfs = []
-    else:
-        internal_orfs = []
-
-    if not internal_orfs:
-        return transcript
-
-    new_orfs = []
-    for orf in internal_orfs:
-        logger.debug("Old ORF: %s", str(orf))
-        try:
-            logger.debug("Sequence for %s: %s[..]%s (upstream %s, downstream %s)",
-                         transcript.id, seq[:10], seq[-10:], upstream, downstream)
-            orf.expand(seq, upstream, downstream, expand_orf=True, logger=logger)
-        except AssertionError as err:
-            logger.error(err)
-            logger.error("%s, %s, %s, %s",
-                         upstream,
-                         downstream,
-                         transcript.exons,
-                         transcript.cdna_length)
-            raise AssertionError(err)
-        logger.debug("New ORF: %s", str(orf))
-        if orf.coding is False:
-            raise ValueError(orf)
-        elif orf.invalid:
-            raise InvalidTranscript(orf.invalid_reason)
-
-        new_orfs.append(orf)
-
-    transcript.load_orfs(new_orfs)
-    transcript.finalize()
-    if backup.is_coding and not transcript.is_coding:
-        raise InvalidTranscript(new_orfs)
-    return transcript
diff --git a/Mikado/loci/superlocus.py b/Mikado/loci/superlocus.py
index d9d489fc0..f1c5c2f65 100644
--- a/Mikado/loci/superlocus.py
+++ b/Mikado/loci/superlocus.py
@@ -33,7 +33,7 @@
 from collections import OrderedDict as SortedDict
 from .locus import Locus
 from .excluded import Excluded
-from typing import Union
+from typing import Union, List, Dict
 from ..utilities import Interval, IntervalTree
 from itertools import combinations
 import random
@@ -160,7 +160,6 @@ def __init__(self,
         self.engine = self.sessionmaker = self.session = None
         # Excluded object
         self.excluded = Excluded(configuration=self.configuration)
-        self.__retained_sources = set()
         self.__data_loaded = False
         self.__lost = dict()
         if transcript_instance is not None:
@@ -236,10 +235,8 @@ def __create_sublocus_lines(self, superlocus_line: GffLine, new_id: str, print_c
         self.define_subloci()
         found = dict()
         for sublocus_instance in self.subloci:
-            try:
-                sublocus_instance.source = source
-            except AttributeError:
-                raise AttributeError(sublocus_instance)
+            assert hasattr(sublocus_instance, "source"), sublocus_instance
+            sublocus_instance.source = source
             sublocus_instance.parent = new_id
             if sublocus_instance.id in found:
                 found[sublocus_instance.id] += 1
@@ -260,8 +257,7 @@ def format(self, print_cds=True, level=None):
         :param level: level which we wish to print for. Can be "loci", "subloci", "monosubloci"
         :return: formatted GFF strings
         """
-        return self.__str__(print_cds=print_cds,
-                            level=level)
+        return self.__str__(print_cds=print_cds, level=level)
 
     def __str__(self, level=None, print_cds=True):
 
@@ -284,6 +280,8 @@ def __str__(self, level=None, print_cds=True):
         if abs(self.start) == float("inf") or abs(self.start) == maxsize:
             return ''
 
+        assert level in (None, "loci", "subloci", "monosubloci"), f"Unrecognized level: {level}"
+
         superlocus_line = GffLine('')
         superlocus_line.chrom = self.chrom
         superlocus_line.feature = self.__name__
@@ -296,16 +294,10 @@ def __str__(self, level=None, print_cds=True):
         superlocus_line.id, superlocus_line.name = new_id, self.name
         if self.approximation_level > 0:
             superlocus_line.attributes["approximation_level"] = self.approximation_level
-        if len(self.__retained_sources) > 0:
-            superlocus_line.attributes["retained_sources"] = ",".join(
-                sorted(list(self.__retained_sources))
-            )
 
         lines = []
-        if level not in (None, "loci", "subloci", "monosubloci"):
-            raise ValueError("Unrecognized level: {0}".format(level))
 
-        elif level == "loci" or (level is None and self.loci_defined is True):
+        if level == "loci" or (level is None and self.loci_defined is True):
             lines = self.__create_locus_lines(
                 superlocus_line,
                 new_id,
@@ -315,9 +307,6 @@ def __str__(self, level=None, print_cds=True):
             lines = self.__create_monolocus_holder_lines(superlocus_line,
                                                          new_id,
                                                          print_cds=print_cds)
-            # lines = self.__create_monolocus_lines(superlocus_line,
-            #                                       new_id,
-            #                                       print_cds=print_cds)
         elif level == "subloci" or (level is None and self.monosubloci_defined is False):
             lines = self.__create_sublocus_lines(superlocus_line,
                                                  new_id,
@@ -584,17 +573,15 @@ async def get_external(self, query_ids, qids):
                                                        External.query_id.in_(qids)))
         for ext in self.session.execute(baked):
             source_id, query_id, score = ext.source_id, ext.query_id, ext.score
-            if source_id not in sources or query_id not in qids:
-                continue
+            assert source_id in sources and query_id in qids
             rtype = sources[source_id].rtype
+            assert rtype in ("int", "float", "bool"), f"Invalid rtype: {rtype}"
             if rtype == "int":
                 score = int(score)
             elif rtype == "float":
                 score = float(score)
             elif rtype == "bool":
                 score = bool(int(score))
-            else:
-                raise ValueError("Invalid rtype: {}".format(sources[ext.source_id].rtype))
             external[query_ids[ext.query_id].query_name][
                 sources[ext.source_id].source] = (score, sources[ext.source_id].valid_raw)
         return external
@@ -637,7 +624,7 @@ async def get_hits(self, query_ids, qids):
             )
         return hits
 
-    async def get_orfs(self, qids):
+    async def get_orfs(self, qids) -> Dict[str, List]:
         orfs = collections.defaultdict(list)
         for orf in orfs_baked(self.session).params(queries=qids):
             orfs[orf.query].append(orf.as_bed12())
diff --git a/Mikado/parsers/__init__.py b/Mikado/parsers/__init__.py
index a47166b6b..6c8f17847 100644
--- a/Mikado/parsers/__init__.py
+++ b/Mikado/parsers/__init__.py
@@ -74,6 +74,8 @@ def parser_factory(string, input_format=None):
                     continue
             if found:
                 break
+            else:
+                raised[test.__annot_type__] = "No valid line found."
         except InvalidParsingFormat as exc:
             raised[test.__annot_type__] = exc
             continue
@@ -83,6 +85,7 @@ def parser_factory(string, input_format=None):
     elif found:
         return test(string)
     else:
+
         raise InvalidParsingFormat(
             "Invalid file specified: {} should have been of format {}, but it could not be verified. Error:\n{}".format(
              fname if fname != "-" else "stream", input_format, raised[input_format]
diff --git a/Mikado/parsers/bed12.py b/Mikado/parsers/bed12.py
index 67cd80794..f7adf86b7 100644
--- a/Mikado/parsers/bed12.py
+++ b/Mikado/parsers/bed12.py
@@ -34,6 +34,7 @@
 import numpy as np
 import random
 import pprint as pp
+from math import modf
 from Bio.Data import IUPACData
 
 
@@ -46,7 +47,15 @@
                                           IUPACData.extended_protein_values)
 assert standard.start_codons == ["ATG"]
 assert CodonTable.ambiguous_dna_by_id[1].start_codons != ["ATG"]
-CodonTable.ambiguous_dna_by_id[0] = standard
+
+ambiguous_dna_by_id = dict()
+ambiguous_dna_by_name = dict()
+for key, table in CodonTable.ambiguous_dna_by_name.items():
+    ambiguous_dna_by_name[key] = table
+
+for key, table in CodonTable.ambiguous_dna_by_id.items():
+    ambiguous_dna_by_id[key] = table
+ambiguous_dna_by_id[0] = standard
 
 
 @functools.lru_cache(typed=True, maxsize=2**10)
@@ -421,26 +430,41 @@ def table(self):
 
     @table.setter
     def table(self, table):
-        # We are going to receive a string, so we need first to convert to integer
-        try:
+        if isinstance(table, bool):  # Boolean can be considered as int so this requires special handling
+            raise ValueError(f"Invalid table specified: {table} (type {type(table)})")
+        elif table is not None and not isinstance(table, (int, float, bytes, str)):
+            raise ValueError(f"Invalid table specified: {table} (type {type(table)})")
+        elif isinstance(table, (str, bytes)):
+            table = table.decode() if isinstance(table, bytes) else table
+            if table.isdigit() is True:
+                table = int(table)
+            elif re.search(r"^[0-9]*\.[0-9]$", table):
+                table = float(table)
+                if modf(table) != 0:
+                    raise ValueError(f"Invalid table specified: {table}")
+                table = int(table)
+        elif isinstance(table, float):
+            if modf(table) != 0:
+                raise ValueError(f"Invalid table specified: {table}")
             table = int(table)
-        except (ValueError, TypeError):
-            pass
+
         if table is None:
             self.__table = standard
             self.__table_index = 0
         elif isinstance(table, int):
-            self.__table = CodonTable.ambiguous_dna_by_id[table]
-            self.__table_index = 0
+            if table not in ambiguous_dna_by_id.keys():
+                raise ValueError(f"Invalid table code specified: {table}. Available codes: "
+                                 f"{', '.join([str(_) for _ in ambiguous_dna_by_id.keys()])}")
+            self.__table = ambiguous_dna_by_id[table]
+            assert self.__table.start_codons == ["ATG"] if table == 0 else True, f"Invalid codons for table 0: " \
+                                                                                 f"{self.__table.start_codons}"
+            self.__table_index = table
         elif isinstance(table, str):
-            self.__table = CodonTable.ambiguous_dna_by_name[table]
-            self.__table_index = self.__table._codon_table.id
-        elif isinstance(table, bytes):
-            self.__table = CodonTable.ambiguous_dna_by_name[table.decode()]
-            self.__table_index = self.__table._codon_table.id
-        else:
-            raise ValueError("Invalid table: {} (type: {})".format(
-                    table, type(table)))
+            if table not in ambiguous_dna_by_name.keys():
+                raise ValueError(f"Invalid table name specified: {table}. Available table: "
+                                 f"{', '.join([str(_) for _ in ambiguous_dna_by_name.keys()])}")
+            self.__table = ambiguous_dna_by_name[table]
+            self.__table_index = ambiguous_dna_by_name[table].id
         return
 
     @parent.setter
@@ -1399,19 +1423,11 @@ def to_transcriptomic(self, sequence=None, fasta_index=None, start_adjustment=Fa
             seen += block[1] - block[0] + 1
 
         # Check thick start and end are defined
-        error = ""
-        if tStart is None:
-            error += """The thick start of {self.id} ({self.chrom}:{self.start}-{self.end}) is invalid as it is outside of the defined exons.
-Thick start: {self.thick_start}
-Exons: {self.blocks}\n""".format(self=self)
-
-        if tStart is None or tEnd is None:
-            error += """The thick end of {self.id} ({self.chrom}:{self.start}-{self.end}) is invalid as it is outside of the defined exons.
-Thick end: {self.thick_end}
-Exons: {self.blocks}\n""".format(self=self)
 
-        if error:
-            raise ValueError(error)
+        assert tStart is not None and tEnd is not None, f"The thick start, thick end of {self.id} are invalid " \
+                                                        f"as they are outside of the defined exons.\nThick start: " \
+                                                        f"{self.thick_start}\nThick end: {self.thick_end}\n" \
+                                                        f"Exons: {self.blocks}"
 
         if self.strand == "+":
             bsizes = self.block_sizes[:]
@@ -1462,9 +1478,7 @@ def to_transcriptomic(self, sequence=None, fasta_index=None, start_adjustment=Fa
                     transcriptomic=True,
                     lenient=lenient,
                     start_adjustment=start_adjustment)
-        if not isinstance(new, type(self)):
-            raise TypeError("The new object is of type {tnew} instead of {tself}!".format(tnew=type(new),
-                                                                                          tself=type(self)))
+        assert isinstance(new, type(self)), f"The new object is of type {type(new)} instead of {type(self)}!"
         return new
 
     @property
@@ -1570,7 +1584,7 @@ def __next__(self, seq=None):
             else:
                 return self.gff_next()
         except (ValueError, KeyError, TypeError, UnicodeError, AttributeError, AssertionError, InvalidParsingFormat) as exc:
-            raise InvalidParsingFormat("This is not a valid BED12 file! Exception: {}".format(exc))
+            raise InvalidParsingFormat(f"This is not a valid BED12 file! Exception: {exc}")
 
     def __getstate__(self):
         state = super().__getstate__()
diff --git a/Mikado/preparation/prepare.py b/Mikado/preparation/prepare.py
index d87fc2329..a9b4ebeed 100644
--- a/Mikado/preparation/prepare.py
+++ b/Mikado/preparation/prepare.py
@@ -360,6 +360,11 @@ def perform_check(keys, shelve_names, mikado_config: MikadoConfiguration, logger
 row_columns = ["chrom", "start", "end", "strand", "tid", "write_start", "write_length", "shelf"]
 
 
+def _get_strand_specific_assemblies_boolean_vector(mikado_config):
+    return [(member in mikado_config.prepare.files.strand_specific_assemblies)
+            for member in mikado_config.prepare.files.gff]
+
+
 def _load_exon_lines_single_thread(mikado_config, shelve_names, logger, min_length, strip_cds, max_intron):
 
     logger.info("Starting to load lines from %d files (single-threaded)",
@@ -373,7 +378,7 @@ def _load_exon_lines_single_thread(mikado_config, shelve_names, logger, min_leng
     to_do = list(zip(
             shelve_names,
             mikado_config.prepare.files.labels,
-            mikado_config.prepare.files.strand_specific_assemblies,
+            _get_strand_specific_assemblies_boolean_vector(mikado_config),
             mikado_config.prepare.files.reference,
             mikado_config.prepare.files.exclude_redundant,
             mikado_config.prepare.files.strip_cds,
@@ -384,7 +389,7 @@ def _load_exon_lines_single_thread(mikado_config, shelve_names, logger, min_leng
             (
                 shelve_names,
                 mikado_config.prepare.files.labels,
-                mikado_config.prepare.files.strand_specific_assemblies,
+                _get_strand_specific_assemblies_boolean_vector(mikado_config),
                 mikado_config.prepare.files.reference,
                 mikado_config.prepare.files.exclude_redundant,
                 mikado_config.prepare.files.strip_cds,
@@ -459,7 +464,7 @@ def _load_exon_lines_multi(mikado_config, shelve_names, logger, min_length, stri
                       exclude_redundant, file_strip_cds, gff_name) in enumerate(zip(
             shelve_names,
             mikado_config.prepare.files.labels,
-            mikado_config.prepare.files.strand_specific_assemblies,
+            _get_strand_specific_assemblies_boolean_vector(mikado_config),
             mikado_config.prepare.files.reference,
             mikado_config.prepare.files.exclude_redundant,
             mikado_config.prepare.files.strip_cds,
@@ -581,12 +586,7 @@ def prepare(mikado_config: MikadoConfiguration, logger):
     )
 
     if mikado_config.prepare.strand_specific is True:
-        mikado_config.prepare.files.strand_specific_assemblies = [True] * len(
-            mikado_config.prepare.files.gff)
-    else:
-        mikado_config.prepare.files.strand_specific_assemblies = [
-            (member in mikado_config.prepare.files.strand_specific_assemblies)
-            for member in mikado_config.prepare.files.gff]
+        mikado_config.prepare.files.strand_specific_assemblies = mikado_config.prepare.files.gff[:]
 
     ref_len = len(mikado_config.prepare.files.reference)
     file_len = len(mikado_config.prepare.files.gff)
diff --git a/Mikado/subprograms/configure.py b/Mikado/subprograms/configure.py
index ef3bba0c8..748f0ea54 100644
--- a/Mikado/subprograms/configure.py
+++ b/Mikado/subprograms/configure.py
@@ -101,7 +101,10 @@ def create_config(args):
         args.gff = []
     config = parse_prepare_options(args, config)
 
-    config.seed = args.seed if args.seed is not None else config.seed
+    if args.random_seed is True:
+        config.seed = None
+    else:
+        config.seed = args.seed
 
     config.serialise.files.junctions = args.junctions if args.junctions is not None else \
         config.serialise.files.junctions
@@ -128,7 +131,6 @@ def create_config(args):
     config.pick.output_format.report_all_external_metrics = True if args.report_all_external_metrics else \
         config.pick.output_format.report_all_external_metrics
 
-
     if args.scoring is not None:
         if args.copy_scoring is not False:
             with open(args.copy_scoring, "wt") as out:
@@ -181,6 +183,8 @@ def create_config(args):
         config.serialise.files.output_dir = args.out_dir
         config.pick.files.output_dir = args.out_dir
 
+    config.check()
+
     # Check that the configuration file is correct
     with tempfile.NamedTemporaryFile("wt", suffix=".json", delete=True) as tempcheck:
         print_config(config, tempcheck, full=args.full, output_format="json")
@@ -217,8 +221,10 @@ def configure_parser():
 
     parser = argparse.ArgumentParser(description="Configuration utility for Mikado")
     parser.add_argument("--full", action="store_true", default=False)
-    parser.add_argument("--seed", type=int, default=0,
-                        help="Random seed number.")
+    seed_group = parser.add_mutually_exclusive_group()
+    seed_group.add_argument("--seed", type=int, default=0, help="Random seed number. Default: 0.")
+    seed_group.add_argument("--random-seed", action="store_true", default=False,
+                            help="Generate a new random seed number (instead of the default of 0)")
     preparer = parser.add_argument_group("Options related to the prepare stage.")
     preparer.add_argument("--minimum-cdna-length", default=None, type=int, dest="minimum_cdna_length",
                           help="Minimum cDNA length for transcripts.")
diff --git a/Mikado/subprograms/pick.py b/Mikado/subprograms/pick.py
index 5fb434c1b..4b21a165a 100644
--- a/Mikado/subprograms/pick.py
+++ b/Mikado/subprograms/pick.py
@@ -133,7 +133,13 @@ def _set_conf_values_from_args(conf: Union[DaijinConfiguration, MikadoConfigurat
 
     conf.multiprocessing_method = args.start_method if args.start_method else conf.multiprocessing_method
     conf.threads = args.procs if args.procs is not None else conf.threads
-    conf.seed = args.seed if args.seed is not None else conf.seed
+    if args.random_seed is True:
+        conf.seed = None
+    elif args.seed is not None:
+        conf.seed = args.seed
+    else:
+        pass
+
     conf.pick.scoring_file = args.scoring_file if args.scoring_file is not None else conf.pick.scoring_file
 
     conf.prepare.max_intron_length = args.max_intron_length if args.max_intron_length is not None else \
@@ -402,8 +408,10 @@ def pick_parser():
                          either of the ORFs lacks a BLAST hit (but not both).
                         - permissive: like lenient, but also split when both ORFs lack BLAST hits
                         - split: split multi-orf transcripts regardless of what BLAST data is available.""")
-    parser.add_argument("--seed", type=int, default=None,
-                        help="Random seed number.")
+    seed_group = parser.add_mutually_exclusive_group()
+    seed_group.add_argument("--seed", type=int, default=None, help="Random seed number. Default: 0.")
+    seed_group.add_argument("--random-seed", action="store_true", default=False,
+                            help="Generate a new random seed number (instead of the default of 0)")
     parser.add_argument("gff", nargs="?", default=None)
     parser.set_defaults(func=pick)
     return parser
diff --git a/Mikado/subprograms/prepare.py b/Mikado/subprograms/prepare.py
index 24a68491c..1c0b0a04f 100644
--- a/Mikado/subprograms/prepare.py
+++ b/Mikado/subprograms/prepare.py
@@ -120,8 +120,14 @@ def parse_prepare_options(args, mikado_config) -> Union[DaijinConfiguration, Mik
     mikado_config.serialise.codon_table = str(args.codon_table) if (
             getattr(args, "codon_table", None) not in (None, False, True)) else mikado_config.serialise.codon_table
 
-    mikado_config.seed = args.seed if args.seed is not None else mikado_config.seed
+    if args.random_seed is True:
+        mikado_config.seed = None
+    elif args.seed is not None:
+        mikado_config.seed = args.seed
+    else:
+        pass
 
+    mikado_config.check()
     assert isinstance(mikado_config.reference.genome, str)
     return mikado_config
 
@@ -284,8 +290,10 @@ def positive(string):
     cds_stripping.add_argument("--strip-faulty-cds", default=None, action="store_true",
                         help="Flag. If set, transcripts with an incorrect CDS will be retained but \
 with their CDS stripped. Default behaviour: the whole transcript will be considered invalid and discarded.")
-    parser.add_argument("--seed", type=int, default=None,
-                        help="Random seed number.")
+    seed_group = parser.add_mutually_exclusive_group()
+    seed_group.add_argument("--seed", type=int, default=None, help="Random seed number. Default: 0.")
+    seed_group.add_argument("--random-seed", action="store_true", default=False,
+                            help="Generate a new random seed number (instead of the default of 0)")
     parser.add_argument("gff", help="Input GFF/GTF file(s).", nargs="*")
     parser.set_defaults(func=prepare_launcher)
     return parser
diff --git a/Mikado/subprograms/serialise.py b/Mikado/subprograms/serialise.py
index b8b7db15e..2d3c30947 100644
--- a/Mikado/subprograms/serialise.py
+++ b/Mikado/subprograms/serialise.py
@@ -289,7 +289,14 @@ def setup(args):
 
     logger.setLevel("INFO")
     logger.info("Command line: %s", " ".join(sys.argv))
-    mikado_configuration.seed = args.seed if args.seed is not None else mikado_configuration.seed
+    if args.random_seed is True:
+        mikado_configuration.seed = None
+    elif args.seed is not None:
+        mikado_configuration.seed = args.seed
+    else:
+        pass
+
+    mikado_configuration.check()
     random.seed(mikado_configuration.seed)
     logger.info("Random seed: %s", mikado_configuration.seed)
     logger.setLevel(mikado_configuration.log_settings.log_level)
@@ -449,7 +456,9 @@ def serialise_parser():
     generic.add_argument("db", type=str, default=None,
                          nargs='?',
                          help="Optional output database. Default: derived from configuration")
-    generic.add_argument("--seed", type=int, default=None,
-                         help="Random seed number.")
+    seed_group = parser.add_mutually_exclusive_group()
+    seed_group.add_argument("--seed", type=int, default=None, help="Random seed number. Default: 0.")
+    seed_group.add_argument("--random-seed", action="store_true", default=False,
+                            help="Generate a new random seed number (instead of the default of 0)")
     parser.set_defaults(func=serialise)
     return parser
diff --git a/Mikado/tests/locus_test.py b/Mikado/tests/locus_test.py
index c005f9fe1..f1db3f490 100644
--- a/Mikado/tests/locus_test.py
+++ b/Mikado/tests/locus_test.py
@@ -516,6 +516,7 @@ def setUp(self):
         self.assertIsNotNone(self.configuration.scoring, self.configuration)
         self.transcript1.configuration = self.configuration
         self.transcript2.configuration = self.configuration
+        self.assertEqual(self.transcript1.configuration.seed, self.transcript2.configuration.seed)
 
     def test_create_metrics_row(self):
 
@@ -999,6 +1000,7 @@ def test_serialisation(self):
 
     def test_slocus_dicts(self):
 
+        self.assertEqual(self.transcript1.configuration.seed, self.transcript2.configuration.seed)
         locus = Superlocus(self.transcript1)
         locus.add_transcript_to_locus(self.transcript2, check_in_locus=False)
         locus.subloci = [Sublocus(self.transcript1)]
@@ -1006,7 +1008,7 @@ def test_slocus_dicts(self):
         locus.loci = {l.id: l}
         ml = MonosublocusHolder(Monosublocus(self.transcript1))
         locus.monoholders = [ml]
-        locus.excluded = Excluded(self.transcript2)
+        locus.excluded = Excluded(self.transcript2, configuration=locus.configuration)
         conf = locus.configuration.copy()
         _without = locus.as_dict(with_subloci=False, with_monoholders=False)
         self.assertEqual(_without["subloci"], [])
@@ -1014,9 +1016,15 @@ def test_slocus_dicts(self):
         self.assertEqual(_without["excluded"], locus.excluded.as_dict())
         self.assertEqual(_without["loci"], {l.id: l.as_dict()})
         _with = locus.as_dict(with_subloci=True, with_monoholders=True)
+        self.assertIsNotNone(_with["json_conf"]["seed"])
+        self.assertEqual(_with["json_conf"]["seed"], conf.seed)
         self.assertEqual(_with["subloci"], [locus.subloci[0].as_dict()])
         self.assertEqual(_with["monoholders"], [ml.as_dict()])
-        self.assertEqual(_with["excluded"], Excluded(self.transcript2).as_dict())
+        self.assertEqual(conf.seed, locus.configuration.seed)
+        self.assertEqual(conf.seed, self.transcript2.configuration.seed)
+        excl = Excluded(self.transcript2, configuration=conf)
+        self.assertEqual(excl.configuration.seed, locus.configuration.seed)
+        self.assertEqual(_with["excluded"], Excluded(self.transcript2, configuration=conf).as_dict())
         self.assertEqual(_with["loci"], {l.id: l.as_dict()})
         self.assertIsInstance(_with["json_conf"], dict)
         # Now test the reloading
@@ -4234,11 +4242,11 @@ def test_complete_padding(self):
                 locus.logger = logger
                 locus.configuration.pick.alternative_splicing.ts_distance = pad_distance
                 locus.configuration.pick.alternative_splicing.ts_max_splices = max_splice
-                # locus.logger.setLevel("DEBUG")
+                locus.logger.setLevel("DEBUG")
                 locus.pad_transcripts()
                 locus.logger.setLevel("WARNING")
-
-                self.assertEqual(locus[best].start, transcripts["AT5G01030.2"].start)
+                self.assertEqual(transcripts["AT5G01030.2"].start, 9869)
+                self.assertEqual(locus[best].start, 9869)
                 self.assertIn(best, locus)
                 if max_splice < 2 or pad_distance <= 250:
                     with self.assertLogs(logger, "DEBUG") as cm:
diff --git a/Mikado/tests/test_bed12.py b/Mikado/tests/test_bed12.py
index abb23936b..e86160d51 100644
--- a/Mikado/tests/test_bed12.py
+++ b/Mikado/tests/test_bed12.py
@@ -97,6 +97,27 @@ def test_ambiguous(self):
         if ambigouous is None:
             return  # Nothing to test
 
+    def test_set_table(self):
+        b = BED12()
+        for invalid in (True, list(), "Inexistent", b"Standard2"):
+            with self.assertRaises(ValueError):
+                b.table = invalid
+        self.assertNotIn(0, CodonTable.ambiguous_dna_by_id.keys())
+        for num in range(0, max(CodonTable.ambiguous_dna_by_id.keys()) + 10):
+            if num in CodonTable.ambiguous_dna_by_id.keys():
+                b.table = num
+                self.assertEqual(b.table, CodonTable.ambiguous_dna_by_id[num])
+            elif num == 0:
+                b.table = num
+                self.assertEqual(b.table, standard)
+            else:
+                with self.assertRaises(ValueError):
+                    b.table = num
+
+        for valid in list(CodonTable.ambiguous_dna_by_name.keys()):
+            b.table = valid
+            self.assertEqual(b.table, CodonTable.ambiguous_dna_by_name[valid])
+
 
 class Bed12GenToTrans(unittest.TestCase):
 
diff --git a/Mikado/tests/test_external_async.py b/Mikado/tests/test_external_async.py
index f7479a8e7..6a7c3828d 100644
--- a/Mikado/tests/test_external_async.py
+++ b/Mikado/tests/test_external_async.py
@@ -3,6 +3,8 @@
 from Mikado._transcripts.scoring_configuration import MinMaxScore, SizeFilter
 from Mikado.configuration.configurator import load_and_validate_config
 from Mikado.loci import Superlocus
+from Mikado.parsers.bed12 import BED12
+from Mikado.serializers.blast_serializer import Target, Hit, Hsp
 from Mikado.serializers.external import External, ExternalSource
 from Mikado.serializers.blast_serializer.query import Query
 from Mikado.serializers.orf import Orf
@@ -195,4 +197,43 @@ def test_retrieval(self):
 class AsyncOrfLoading(unittest.TestCase):
 
     def test_load_orfs(self):
-        """"""
+
+        transcript_line = 'Chr1\t100\t2000\tID=foo;coding=True;phase=0'\
+                          '\t0\t+\t300\t1850\t0\t4\t400,400,400,200\t0,500,1100,1700'
+        transcript = Transcript(transcript_line)
+        orf = transcript.orfs[0].to_transcriptomic()
+        transcript2 = transcript.copy()
+        transcript2.unfinalize()
+        transcript2.chrom = "Chr2"
+        transcript2.id = "foo.2"
+        transcript2.finalize()
+        other_orf = transcript2.orfs[0].to_transcriptomic()
+        engine = create_engine("sqlite:///:memory:")
+        db.metadata.create_all(engine)
+        SessionMaker = sessionmaker(bind=engine)
+        session = SessionMaker()
+        query = Query(transcript.id, transcript.cdna_length)
+        query2 = Query(transcript2.id, transcript2.cdna_length)
+        session.add_all([query, query2])
+        session.commit()
+        serialized_orf = Orf(orf, query.query_id)
+        self.assertEqual(serialized_orf.thick_end, orf.thick_end)
+        self.assertEqual(serialized_orf.cds_len, orf.cds_len)
+        serialized_other_orf = Orf(other_orf, query2.query_id)
+        session.add_all([serialized_orf, serialized_other_orf])
+        session.commit()
+        sup = Superlocus(transcript)
+        sup.session = session
+        sup_orfs = asyncio.run(sup.get_orfs([query.query_id]))
+        self.assertEqual(len(sup_orfs), 1)
+        self.assertIn(transcript.id, sup_orfs)
+        self.assertEqual(len(sup_orfs[transcript.id]), 1)
+        self.assertIsInstance(sup_orfs[transcript.id][0], BED12, type(sup_orfs[transcript.id][0]))
+        self.assertTrue(sup_orfs[transcript.id][0] == orf, "\n" + "\n".join(
+            [str(orf), str(sup_orfs[transcript.id][0])]))
+
+
+# TODO: Create a test for the BLAST hits/hsps
+
+class AsyncBlastTest(unittest.TestCase):
+    """Test for the functionality of loading a BLAST hit from a Superlocus object."""
diff --git a/Mikado/tests/test_system_calls.py b/Mikado/tests/test_system_calls.py
index 9ceef1151..04e2dbafb 100644
--- a/Mikado/tests/test_system_calls.py
+++ b/Mikado/tests/test_system_calls.py
@@ -527,7 +527,7 @@ def test_cdna_redundant_cds_not(self):
                     args.procs = 1
                     args.list = None
                     args.gffs = None
-                    args.strand_specific_assemblies = None
+                    args.strand_specific_assemblies = []
                     args.labels = None
                     args.configuration = self.conf
                     args.exclude_redundant = b
@@ -538,6 +538,8 @@ def test_cdna_redundant_cds_not(self):
                     args.log = "prepare.log"
                     self.logger.setLevel("DEBUG")
                     assert os.path.exists(folder)
+                    self.assertEqual(args.strand_specific_assemblies, [])
+                    self.assertEqual(args.configuration.prepare.files.strand_specific_assemblies, [])
                     args, mikado_configuration, _logger = prepare_setup(args)
                     self.assertIsNotNone(mikado_configuration)
                     # self.assertEqual(args.output_dir, folder)
@@ -2427,6 +2429,7 @@ def test_xml_vs_tsv(self):
                         args.log = "{}_{}.log".format(name, proc)
                         args.xml = blast
                         args.procs = proc
+                        args.start_adjustment = True
                         serialise(args)
                         dbs[name][proc] = os.path.join(test_xml_vs_tsv_folder, args.db)
                         logged = [_.rstrip() for _ in open(os.path.join(test_xml_vs_tsv_folder, args.log))]
diff --git a/Mikado/transcripts/pad.py b/Mikado/transcripts/pad.py
new file mode 100644
index 000000000..9fe7b97ac
--- /dev/null
+++ b/Mikado/transcripts/pad.py
@@ -0,0 +1,388 @@
+from .transcript import Transcript
+import pysam
+from ..exceptions import InvalidTranscript
+from .transcriptchecker import TranscriptChecker
+
+
+def expand_transcript(transcript: Transcript,
+                      backup: Transcript,
+                      start_transcript: [Transcript, bool],
+                      end_transcript: [Transcript, bool],
+                      fai: pysam.libcfaidx.FastaFile,
+                      logger):
+
+    """This method will enlarge the coordinates and exon structure of a transcript, given:
+    :param transcript: the transcript to modify.
+    :type transcript: Transcript
+    :param backup: a copy of the transcript to be modified.
+    :type backup: Transcript
+    :param start_transcript: the template transcript for the 5' end.
+    :param end_transcript: the template transcript for the 3' end.
+    :param fai: the indexed genomic sequence.
+    :param logger: the logger to be used in the function.
+    """
+
+    # If there is nothing to do, just get out
+    assert transcript == backup
+    transcript.finalize()
+    if start_transcript not in (False, None):
+        start_transcript.finalize()
+    if end_transcript not in (False, None):
+        end_transcript.finalize()
+
+    if start_transcript in (False, None) and end_transcript in (False, None):
+        logger.debug("%s does not need to be expanded, exiting", transcript.id)
+        return transcript
+
+    if transcript.strand == "-":
+        start_transcript, end_transcript = end_transcript, start_transcript
+
+    # Make a backup copy of the transcript
+    # First get the ORFs
+    # Remove the CDS and unfinalize
+    logger.debug("Starting expansion of %s", transcript.id)
+    strand = transcript.strand
+    transcript.strip_cds()
+    transcript.unfinalize()
+    assert strand == transcript.strand
+
+    upstream, up_exons, new_first_exon, up_remove = _enlarge_start(transcript, backup, start_transcript)
+    downstream, up_exons, down_exons, down_remove = _enlarge_end(transcript,
+                                                                 backup, end_transcript, up_exons, new_first_exon)
+
+    first_exon, last_exon = transcript.exons[0], transcript.exons[-1]
+
+    assert upstream >= 0 and downstream >= 0
+
+    if up_remove is True:
+        # Remove the first exon
+        transcript.remove_exon(first_exon)
+    if down_remove is True:
+        if not (up_remove is True and first_exon == last_exon):
+            transcript.remove_exon(last_exon)
+
+    new_exons = up_exons + down_exons
+    if not new_exons:
+        logger.debug("%s does not need to be expanded, exiting", transcript.id)
+        return backup
+
+    transcript.add_exons(new_exons)
+    transcript.start, transcript.end = None, None
+    transcript.finalize()
+
+    if transcript.strand == "-":
+        downstream, upstream = upstream, downstream
+
+    if backup.is_coding:
+        seq = check_expanded(transcript, backup, start_transcript, end_transcript,
+                             fai, upstream, downstream, logger)
+        transcript = enlarge_orfs(transcript, backup, seq, upstream, downstream, logger)
+        transcript.finalize()
+
+    logger.debug("%s: start (before %s, now %s, %s), end (before %s, now %s, %s)",
+                 transcript.id,
+                 backup.start, transcript.start, transcript.start < backup.start,
+                 backup.end, transcript.end, transcript.end > backup.end)
+    if transcript.start < backup.start or transcript.end > backup.end:
+        transcript.attributes["padded"] = True
+
+    # Now check that we have a valid expansion
+    if backup.is_coding and not transcript.is_coding:
+        # Something has gone wrong. Just return the original transcript.
+        assert new_exons
+        logger.info("Padding %s would lead to an invalid CDS (up exons: %s). Aborting.",
+                    transcript.id, up_exons)
+        return backup
+    elif backup.is_coding:
+        abort = False
+        if backup.strand == "-" and backup.combined_cds_end < transcript.combined_cds_end:
+            abort = True
+        elif backup.strand != "-" and backup.combined_cds_end > transcript.combined_cds_end:
+            abort = True
+        if abort is True:
+            msg = "Padding {} (strand: {}) would lead to an in-frame stop codon ({} to {}, \
+vs original {} to {}. Aborting.".format(
+                transcript.id, backup.strand, transcript.combined_cds_start, transcript.combined_cds_end,
+                backup.combined_cds_start, backup.combined_cds_end)
+            logger.info(msg)
+            return backup
+
+    return transcript
+
+
+def _enlarge_start(transcript: Transcript,
+                   backup: Transcript,
+                   start_transcript: Transcript) -> (int, list, [None, tuple], bool):
+
+    """This method will enlarge the transcript at the 5' end, using another transcript as the template.
+    :param transcript: the original transcript to modify.
+    :param backup: a copy of the transcript. As we are modifying the original one, we do need a hard copy.
+    :param start_transcript: the template transcript.
+
+    The function returns the following:
+    :returns: the upstream modification, the list of upstream exons to add, the new first exon (if any),
+              a boolean flag indicating whether the first exon of the transcript should be removed.
+    """
+
+    upstream = 0
+    up_exons = []
+    new_first_exon = None
+    to_remove = False
+    if start_transcript:
+        transcript.start = start_transcript.start
+        upstream_exons = sorted(
+            [_ for _ in start_transcript.find_upstream(transcript.exons[0][0], transcript.exons[0][1])
+             if _.value == "exon"])
+        intersecting_upstream = sorted(start_transcript.search(
+            transcript.exons[0][0], transcript.exons[0][1]))
+
+        if not intersecting_upstream:
+            raise KeyError("No exon or intron found to be intersecting with %s vs %s, this is a mistake",
+                           transcript.id, start_transcript.id)
+
+        if intersecting_upstream[0].value == "exon":
+            new_first_exon = (min(intersecting_upstream[0][0], backup.start),
+                              transcript.exons[0][1])
+            if new_first_exon != transcript.exons[0]:
+                upstream += backup.start - new_first_exon[0]
+                up_exons.append(new_first_exon)
+                to_remove = True
+            else:
+                new_first_exon = None
+            if intersecting_upstream[0] in upstream_exons:
+                upstream_exons.remove(intersecting_upstream[0])
+            upstream += sum(_[1] - _[0] + 1 for _ in upstream_exons)
+            up_exons.extend([(_[0], _[1]) for _ in upstream_exons])
+        elif intersecting_upstream[0].value == "intron":
+            # Check whether the first exon of the model *ends* within an *intron* of the template
+            # If that is the case, we have to keep the first exon in place and
+            # just expand it until the end
+            # Now we have to expand until the first exon in the upstream_exons
+            if intersecting_upstream[0][1] == transcript.exons[0][0] - 1:
+                assert upstream_exons
+                to_remove = False
+            elif upstream_exons:
+                to_remove = True
+                upstream_exon = upstream_exons[-1]
+                new_first_exon = (upstream_exon[0], transcript.exons[0][1])
+                upstream_exons.remove(upstream_exon)
+                upstream += backup.start - new_first_exon[0]
+                up_exons.append(new_first_exon)
+            else:
+                # Something fishy going on here. Let us double check everything.
+                if start_transcript.exons[0][0] == transcript.start:
+                    raise ValueError(
+                        "Something has gone wrong. The template transcript should have returned upstream exons."
+                    )
+                elif start_transcript.exons[0][0] < transcript.start:
+                    raise ValueError(
+                        "Something has gone wrong. We should have found the correct exons."
+                    )
+                else:
+                    pass
+
+            upstream += sum(_[1] - _[0] + 1 for _ in upstream_exons)
+            up_exons.extend([(_[0], _[1]) for _ in upstream_exons])
+
+    return upstream, up_exons, new_first_exon, to_remove
+
+
+def _enlarge_end(transcript: Transcript,
+                 backup: Transcript,
+                 end_transcript: Transcript,
+                 up_exons: list,
+                 new_first_exon: [None, tuple]) -> [int, list, list, bool]:
+
+    """
+    This method will enlarge the transcript at the 5' end, using another transcript as the template.
+    :param transcript: the original transcript to modify.
+    :param backup: a copy of the transcript. As we are modifying the original one, we do need a hard copy.
+    :param end_transcript: the template transcript.
+    :param up_exons: the list of exons added at the 5' end.
+    :param new_first_exon: the new coordinates of what used to be the first exon of the transcript.
+                           This is necessary because if the transcript is monoexonic, we might need to re-modify it.
+
+    The function returns the following:
+    :returns: the downstream modification, the (potentially modified) list of upstream exons to add,
+              the list of downstream exons to add, a boolean flag indicating whether the last exon of the transcript
+              should be removed.
+    """
+
+    downstream = 0
+    down_exons = []
+    to_remove = False
+
+    if end_transcript:
+        transcript.end = end_transcript.end
+        downstream_exons = sorted(
+            [_ for _ in end_transcript.find_downstream(transcript.exons[-1][0], transcript.exons[-1][1])
+             if _.value == "exon"])
+        intersecting_downstream = sorted(end_transcript.search(
+            transcript.exons[-1][0], transcript.exons[-1][1]))
+        if not intersecting_downstream:
+            raise KeyError("No exon or intron found to be intersecting with %s vs %s, this is a mistake",
+                           transcript.id, end_transcript.id)
+        # We are taking the right-most intersecting element.
+        if intersecting_downstream[-1].value == "exon":
+            if transcript.monoexonic and new_first_exon is not None:
+                new_exon = (new_first_exon[0], max(intersecting_downstream[-1][1], new_first_exon[1]))
+                if new_exon != new_first_exon:
+                    up_exons.remove(new_first_exon)
+                    downstream += new_exon[1] - backup.end
+                    down_exons.append(new_exon)
+                    to_remove = True
+            else:
+                new_exon = (transcript.exons[-1][0],
+                            max(intersecting_downstream[-1][1], transcript.exons[-1][1]))
+                if new_exon != transcript.exons[-1]:
+                    downstream += new_exon[1] - backup.end
+                    down_exons.append(new_exon)
+                    to_remove = True
+
+            if intersecting_downstream[-1] in downstream_exons:
+                downstream_exons.remove(intersecting_downstream[-1])
+            downstream += sum(_[1] - _[0] + 1 for _ in downstream_exons)
+            down_exons.extend([(_[0], _[1]) for _ in downstream_exons])
+        elif intersecting_downstream[-1].value == "intron":
+            # Now we have to expand until the first exon in the upstream_exons
+            if intersecting_downstream[-1][0] == transcript.exons[-1][1] + 1:
+                assert downstream_exons
+                to_remove = False
+            elif downstream_exons:
+                downstream_exon = downstream_exons[0]
+                assert downstream_exon[1] > backup.end
+                assert downstream_exon[0] > backup.end
+                if transcript.monoexonic and new_first_exon is not None:
+                    new_exon = (new_first_exon[0], downstream_exon[1])
+                    up_exons.remove(new_first_exon)
+                    to_remove = True
+                else:
+                    new_exon = (transcript.exons[-1][0], downstream_exon[1])
+                    to_remove = True
+                downstream_exons.remove(downstream_exon)
+                downstream += new_exon[1] - backup.end
+                down_exons.append(new_exon)
+            else:
+                # Something fishy going on here. Let us double check everything.
+                if end_transcript.exons[-1][1] == transcript.end:
+                    raise ValueError(
+                        "Something has gone wrong. The template transcript should have returned upstream exons."
+                    )
+                elif end_transcript.exons[-1][1] > transcript.end:
+                    raise ValueError(
+                        "Something has gone wrong. We should have found the correct exons."
+                    )
+            downstream += sum(_[1] - _[0] + 1 for _ in downstream_exons)
+            down_exons.extend([(_[0], _[1]) for _ in downstream_exons])
+
+    return downstream, up_exons, down_exons, to_remove
+
+
+def check_expanded(transcript, backup, start_transcript, end_transcript, fai, upstream, downstream, logger) -> str:
+
+    """
+    This function checks that the expanded transcript is valid, and it also calculates and returns its cDNA sequence.
+    :param transcript: the modified transcript.
+    :param backup: The original transcript, before expansion.
+    :param start_transcript: the transcript used as template at the 5' end.
+    :param end_transcript: the transcript used as template at the 3' end.
+    :param fai: The pysam.libcfaidx.FastaFile object indexing the genome.
+    :param upstream: the amount of transcriptomic base-pairs added to the transcript at its 5' end.
+    :param downstream: the amount of transcriptomic base-pairs added to the transcript at its 3' end.
+    :param logger: the logger to use.
+    :returns: the cDNA of the modified transcript, as a standard Python string.
+    """
+
+    assert transcript.exons != backup.exons
+    assert transcript.end <= fai.get_reference_length(transcript.chrom), (
+        transcript.end, fai.get_reference_length(transcript.chrom))
+    genome_seq = fai.fetch(transcript.chrom, transcript.start - 1, transcript.end)
+
+    if not (transcript.exons[-1][1] - transcript.start + 1 == len(genome_seq)):
+        error = "{} should have a sequence of length {} ({} start, {} end), but one of length {} has been given"
+        error = error.format(transcript.id, transcript.exons[-1][1] - transcript.start + 1,
+                             transcript.start, transcript.end, len(genome_seq))
+        logger.error(error)
+        raise InvalidTranscript(error)
+    seq = TranscriptChecker(transcript, genome_seq, is_reference=True).cdna
+    assert len(seq) == transcript.cdna_length, (len(seq), transcript.cdna_length, transcript.exons)
+    if not len(seq) == backup.cdna_length + upstream + downstream:
+        error = [len(seq), backup.cdna_length + upstream + downstream,
+                 backup.cdna_length, upstream, downstream,
+                 (transcript.start, transcript.end), (backup.id, backup.start, backup.end),
+                 (None if not start_transcript else (start_transcript.id, (start_transcript.start,
+                                                                           start_transcript.end))),
+                 (None if not end_transcript else (end_transcript.id, (end_transcript.start,
+                                                                       end_transcript.end))),
+                 (backup.id, backup.exons),
+                 None if not start_transcript else (start_transcript.id, start_transcript.exons),
+                 None if not end_transcript else (end_transcript.id, end_transcript.exons),
+                 (transcript.id + "_expanded", transcript.exons),
+                 set.difference(set(transcript.exons), set(backup.exons)),
+                 set.difference(set(backup.exons), set(transcript.exons))
+                 ]
+        error = "\n".join([str(_) for _ in error])
+        raise AssertionError(error)
+    return seq
+
+
+def enlarge_orfs(transcript: Transcript,
+                 backup: Transcript,
+                 seq: str,
+                 upstream: int,
+                 downstream: int,
+                 logger) -> Transcript:
+
+    """
+    This method will take an expanded transcript and recalculate its ORF(s). As a consequence of the expansion,
+    truncated transcripts might become whole.
+    :param transcript: the expanded transcript.
+    :param backup: the original transcript. Used to extract the original ORF(s).
+    :param seq: the new cDNA sequence of the expanded transcript.
+    :param upstream: the amount of expansion that happened at the 5'.
+    :param downstream: the amount of expansion that happened at the 3'.
+    :param logger: the logger.
+    :returns: the modified transcript with the ORF(s) recalculated.
+    """
+
+    if backup.combined_cds_length > 0:
+        try:
+            internal_orfs = list(backup.get_internal_orf_beds())
+        except (ValueError, TypeError, AssertionError):
+            logger.error("Something went wrong with the CDS extraction for %s. Stripping it.",
+                         backup.id)
+            internal_orfs = []
+    else:
+        internal_orfs = []
+
+    if not internal_orfs:
+        return transcript
+
+    new_orfs = []
+    for orf in internal_orfs:
+        logger.debug("Old ORF: %s", str(orf))
+        try:
+            logger.debug("Sequence for %s: %s[..]%s (upstream %s, downstream %s)",
+                         transcript.id, seq[:10], seq[-10:], upstream, downstream)
+            orf.expand(seq, upstream, downstream, expand_orf=True, logger=logger)
+        except AssertionError as err:
+            logger.error(err)
+            logger.error("%s, %s, %s, %s",
+                         upstream,
+                         downstream,
+                         transcript.exons,
+                         transcript.cdna_length)
+            raise AssertionError(err)
+        logger.debug("New ORF: %s", str(orf))
+        if orf.coding is False:
+            raise ValueError(orf)
+        elif orf.invalid:
+            raise InvalidTranscript(orf.invalid_reason)
+
+        new_orfs.append(orf)
+
+    transcript.load_orfs(new_orfs)
+    transcript.finalize()
+    if backup.is_coding and not transcript.is_coding:
+        raise InvalidTranscript(new_orfs)
+    return transcript
diff --git a/sample_data/Snakefile b/sample_data/Snakefile
index f10e9be4d..dc7821b54 100644
--- a/sample_data/Snakefile
+++ b/sample_data/Snakefile
@@ -72,8 +72,8 @@ rule daijin_assemble:
     output:
         conf="daijin_test/mikado.yaml"
     threads: 4
-    message: "daijin assemble --nolock --threads 2 --cores 4 --jobs 2 daijin.toml"
-    shell: "daijin assemble --nolock --threads 2 --cores 4 --jobs 2 {input.conf}"
+    message: "daijin assemble -nd --nolock --threads 2 --cores 4 --jobs 2 daijin.toml"
+    shell: "daijin assemble -nd --nolock --threads 2 --cores 4 --jobs 2 {input.conf}"
 
 rule test_json:
     input: db=swissprot, config=configname