Skip to content

Commit

Permalink
Fix #387. Add tests (with relative patches for minor, untriggered bug…
Browse files Browse the repository at this point in the history
…s) for BED12 and ORF loading. CHANGELOG updated.
  • Loading branch information
lucventurini committed Mar 12, 2021
1 parent 501704b commit 67d4672
Show file tree
Hide file tree
Showing 22 changed files with 626 additions and 507 deletions.
13 changes: 10 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,17 @@ Other changes:
provided as a stream is *disabled* though.
- Fix [#382](https://github.com/EI-CoreBioinformatics/mikado/issues/382): now Mikado can accept generic BED12 files
as input junctions, not just Portcullis junctions. This allows e.g. a user to provide a ***set of gene models***
in BED12 format as sources of valid junctions.
- Slightly increased the unit-test coverage for the locus classes, e.g. properly covering the `as_dict` and `load_dict`
methods. Minor bugfixes related to the introduction of these unit-tests.
in BED12 format as sources of valid junctions.
- Fix [#387](https://github.com/EI-CoreBioinformatics/mikado/issues/387): now Mikado will always use a static seed,
rather than generating a new one per call unless specifically instructed to do so. The old behaviour can still be
replicated by either setting the `seed` parameter to `null` (ie `None`) in the configuration file, or by
specifying `--random-seed` during the command invocation.
- General increase in code unit-test coverage; in particular:
- Slightly increased the unit-test coverage for the locus classes, e.g. properly covering the `as_dict` and `load_dict`
methods. Minor bugfixes related to the introduction of these unit-tests.
- `Mikado.parsers.to_gff` has been renamed to `Mikado.parsers.parser_factory`.
- The code related to the transcript padding has been moved to the submodule `Mikado.transcripts.pad`, rather than
being part of the `Mikado.loci.locus` submodule.
- Mikado will error informatively if the scoring configuration file is malformed.

# Version 2.1.1
Expand Down
8 changes: 6 additions & 2 deletions Mikado/_transcripts/transcript_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -827,8 +827,8 @@ def get_internal_orf_beds(self) -> List[BED12]:
new_row.thick_start = utr + 1
new_row.thick_end = new_row.thick_start + cds_len - 1
new_row.name = "{}_orf{}".format(self.tid, index)
new_row.block_starts = [row.thick_start]
new_row.block_sizes = [cds_len]
new_row.block_starts = [0]
new_row.block_sizes = [self.cdna_length]
new_row.phase = phase
# self.logger.debug(new_row)
new_row = BED12(new_row,
Expand All @@ -849,6 +849,10 @@ def get_internal_orf_beds(self) -> List[BED12]:

yield new_row

@property
def orfs(self) -> List[BED12]:
return list(self.get_internal_orf_beds())

@Metric
def is_reference(self):
"""Checks whether the transcript has been marked as reference by Mikado prepare"""
Expand Down
20 changes: 15 additions & 5 deletions Mikado/configuration/configuration.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import copy
import dataclasses
from dataclasses import field
import random
from marshmallow import validate, ValidationError
from marshmallow_dataclass import dataclass, Optional
from .picking_config import PickConfiguration
Expand Down Expand Up @@ -41,8 +42,10 @@ class MikadoConfiguration:
"required": True
})
seed: int = field(default=0, metadata={
"metadata": {"description": "Random number generator seed, to ensure reproducibility across runs"},
"validate": validate.Range(min=0, max=2 ** 32 - 1)
"metadata": {"description": "Random number generator seed, to ensure reproducibility across runs. Set to None"
"('null' in YAML/JSON/TOML files) to let Mikado select a random seed every time."},
"validate": validate.Range(min=0, max=2 ** 32 - 1),
"allow_none": True, "required": True
})
multiprocessing_method: Optional[str] = field(default="spawn", metadata={
"metadata": {"description": "Which method (fork, spawn, forkserver) Mikado should use for multiprocessing"},
Expand Down Expand Up @@ -75,11 +78,18 @@ def __post_init__(self):
def copy(self):
return copy.copy(self)

def check(self):
def check(self, logger=create_null_logger()):
if self.seed is None:
self.seed = random.randint(0, 2 ** 32 - 1)
logger.info(f"Random seed: {self.seed}")
if self.scoring is None or not hasattr(self.scoring.requirements, "parameters"):
self.load_scoring()
self.load_scoring(logger=logger)
self.scoring.check(minimal_orf_length=self.pick.orf_loading.minimal_orf_length)
self.Schema().validate(dataclasses.asdict(self))
errors = self.Schema().validate(dataclasses.asdict(self))
if len(errors) > 0:
exc = InvalidConfiguration(f"The configuration is invalid, please double check. Errors:\n{errors}")
logger.critical(exc)
raise exc

def load_scoring(self, logger=None):
"""
Expand Down
20 changes: 4 additions & 16 deletions Mikado/configuration/configurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,24 +118,16 @@ def check_and_load_scoring(configuration: Union[DaijinConfiguration, MikadoConfi

try:
configuration.load_scoring(logger=logger)
configuration.check()
configuration.check(logger=logger)
configuration = check_db(configuration)
if not configuration.multiprocessing_method:
configuration.multiprocessing_method = get_start_method()

except Exception as exc:
except InvalidConfiguration as exc:
logger.exception(exc)
raise

seed = configuration.seed

if seed != 0:
# numpy.random.seed(seed % (2 ** 32 - 1))
random.seed(seed % (2 ** 32 - 1))
else:
# numpy.random.seed(None)
random.seed(None)

assert configuration.seed is not None
random.seed(configuration.seed % (2 ** 32 - 1))
return configuration


Expand Down Expand Up @@ -212,10 +204,6 @@ def load_and_validate_config(raw_configuration: Union[None, MikadoConfiguration,
logger.exception("Loading the configuration file failed with error:\n%s\n\n\n", exc)
raise InvalidConfiguration("The configuration file passed is invalid. Please double check.")

if config.seed == 0 or config.seed is None:
config.seed = random.randint(1, 2 ** 32 - 1)
logger.info("Random seed: {}", config.seed)

random.seed(config.seed % (2 ** 32 - 1))

return config
4 changes: 3 additions & 1 deletion Mikado/configuration/daijin_configurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import toml
import yaml
from pkg_resources import resource_stream
from .configurator import create_cluster_config
from .configurator import create_cluster_config, load_and_validate_config
from . import print_config
from .daijin_configuration import DaijinConfiguration
from .._transcripts.scoring_configuration import ScoringFile
Expand Down Expand Up @@ -254,6 +254,8 @@ def create_daijin_config(args: Namespace, config=None, level="ERROR", piped=Fals

final_config = config.copy()

final_config = load_and_validate_config(final_config)

if args.exe:
with open(args.exe, "wt") as out:
for key, val in dataclasses.asdict(final_config.load).items():
Expand Down
1 change: 1 addition & 0 deletions Mikado/loci/abstractlocus.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ def as_dict(self) -> dict:
state["transcripts"] = dict((tid, state["transcripts"][tid].as_dict()) for tid in state["transcripts"])
assert "metrics_calculated" in state
state["json_conf"] = dataclasses.asdict(state["json_conf"])
assert state["json_conf"]["seed"] is not None
return state

def load_dict(self, state: dict, load_transcripts=True, load_configuration=True):
Expand Down
1 change: 0 additions & 1 deletion Mikado/loci/excluded.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ def __init__(self, monosublocus_instance=None, configuration=None, logger=None):
Abstractlocus.__init__(self, configuration=configuration)
self.splitted = False
self.metrics_calculated = False
# self.configuration = configuration
self.logger = logger
if isinstance(monosublocus_instance, Transcript):
Abstractlocus.__init__(self, transcript_instance=monosublocus_instance)
Expand Down
Loading

0 comments on commit 67d4672

Please sign in to comment.