Skip to content

Commit

Permalink
Switched to JSON schema for the data configuration. So much bloated c…
Browse files Browse the repository at this point in the history
…ode that is going the way of the dodo :-)
  • Loading branch information
Luca Venturini authored and Luca Venturini committed Oct 2, 2015
1 parent a340898 commit 0c2c04b
Show file tree
Hide file tree
Showing 25 changed files with 417 additions and 412 deletions.
4 changes: 2 additions & 2 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
include DESCRIPTION.rst
include *py
recursive-inlcude mikado_lib *py
include *py *.json *.yaml
recursive-include mikado_lib *.py *.json *.yaml
recursive-include util *py
recursive-include test *py
2 changes: 1 addition & 1 deletion mikado_lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@
import mikado_lib.loci_objects
import mikado_lib.serializers
import mikado_lib.exceptions
import mikado_lib.json_utils
import mikado_lib.configuration.json_utils
import mikado_lib.scales
1 change: 1 addition & 0 deletions mikado_lib/configuration/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__author__ = 'venturil'
233 changes: 233 additions & 0 deletions mikado_lib/configuration/configuration_blueprint.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
{
"type": "object",
"comment": [""],
"properties": {
"chimera_split":{
"comment": ["Parameters related to the splitting of transcripts in the presence of",
"two or more ORFs. Parameters:",
"- execute: whether to split multi-ORF transcripts at all. Boolean.",
"- blast_check: whether to use BLAST information to take a decision. See blast_params for details.",
"- blast_params: Parameters related to which BLAST data we want to analyse."],
"type": "object",
"properties":{
"blast_check": {"type": "boolean", "default": false},
"execute": {"type": "boolean", "default": false},
"blast_params": {
"type": "object",
"comment": ["Parameters for the BLAST check prior to splitting.",
"- evalue: Minimum evalue for the whole hit. Default: 1e-6",
"- hsp_evalue: Minimum evalue for any HSP hit (some might be discarded even if the whole hit is valid). Default: 1e-6",
"- leniency: One of 'STRINGENT', 'LENIENT', 'PERMISSIVE'. Default: LENIENT",
"- max_target_seqs: maximum number of hits to consider. Default: 3",
"- minimal_hsp_overlap: minimum overlap of the ORF with the HSP (*not* reciprocal). Default: 0.9, i.e. 90%",
"- min_overlap_duplication: minimum overlap (in %) for two ORFs to consider them as target duplications.",
" This means that if two ORFs have no HSPs in common, but the coverage of their disjoint HSPs covers more",
" Than this % of the length of the *target*, they represent most probably a duplicated gene."
],
"properties": {
"evalue": {"type": "number", "minimum": 0, "default": 0.000001},
"hsp_evalue": {"type": "number", "minimum": 0, "default": 0.000001},
"leniency": {"type": "string", "enum": ["STRINGENT", "LENIENT","PERMISSIVE"],
"default": "LENIENT"},
"max_target_seqs": {"type": "integer", "minimum": 1, "default": 3},
"minimal_hsp_overlap": {"type": "number", "minimum": 0,
"maximum": 1, "default": 0.9},
"min_overlap_duplication": {"type": "number", "minimum": 0, "maximum": 1, "default": 0.9}
}
}
},
"required": ["blast_check", "execute"]
},
"log_settings":{
"comment": ["Settings related to the logs. Keys:",
"log: the name of the log file. Default: mikado.log",
"log_level: verbosity. In decreasing order: 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'"],
"type": "object",
"properties":{
"log": {
"type": "string",
"default": "mikado.log"
},
"log_level": {"type": "string", "enum": ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
"default": "WARNING" }
}
},
"db_settings": {
"comment": ["Settings related to DB connection. Parameters:",
"db: the DB to connect to. Required. Default: mikado.db",
"dbtype: Type of DB to use. Choices: sqlite, postgresql, mysql. Default: sqlite.",
"dbhost: Host of the database. Unused if dbtype is sqlite. Default: localhost",
"dbuser: DB user. Default: ''", "dbpasswd: DB password for the user. Default: ''",
"dbport: Integer. It indicates the default port for the DB."],
"type": "object",
"properties": {
"db": {"default": "mikado.db", "type": "string"},
"dbtype": {"default": "sqlite", "enum": ["mysql", "postgresql", "sqlite"], "type": "string" },
"dbhost": { "format": "hostname", "default": "localhost"},
"dbuser": {"type": "string", "default": ""},
"dbpasswd": {"type": "string", "default": ""},
"dbport": {"type": "integer", "default": 0}
},
"required": ["dbtype", "db"]
},
"source": { "type": "string", "default": "Mikado"},
"alternative_splicing": {
"comment": ["Parameters related to alternative splicing reporting.",
"- report: whether to report at all or not the AS events.",
"- min_cds_overlap: minimum overlap between the CDS of the primary transcript and any AS event. Default: 60%.",
"- keep_retained_introns: Whether to consider as valid AS events where one intron",
"is retained compared to the primary or any other valid AS. Default: false.",
"- max_isoforms: Maximum number of isoforms per locus. 1 implies no AS reported. Default: 3",
"- valid_ccodes: Valid class codes for AS events. See documentation for details. Choices:",
" j, n, O, e, K, o, h. Default: j, O, n, h",
"- max_utr_length: Maximum length of the UTR for AS events. Default: 10e6 (i.e. no limit)",
"- max_fiveutr_length: Maximum length of the 5'UTR for AS events. Default: 10e6 (i.e. no limit)",
"- max_threeutr_length: Maximum length of the 5'UTR for AS events. Default: 10e6 (i.e. no limit)"],
"type": "object",
"properties": {
"report": {
"type": "boolean",
"default": true
},
"min_cds_overlap": {
"type": "number",
"minimum": 0,
"maximum": 1,
"default": 0.6
},
"keep_retained_introns": {
"type": "boolean",
"default": false
},
"max_isoforms": {
"type": "integer",
"minimum": 1,
"default": 3
},
"max_utr_length":{
"type": "integer",
"default": 1000000,
"minimum": 1
},
"max_fiveutr_length":{
"type": "integer",
"default": 1000000,
"minimum": 1
},
"max_threeutr_length":{
"type": "integer",
"default": 1000000,
"minimum": 1
},
"valid_ccodes": {
"type": "array",
"items": {
"type": "string",
"enum": [
"j",
"n",
"O",
"e",
"K",
"o",
"h"
]
},
"default": [
"j",
"n",
"O",
"h"
]
}
}
},
"orf_loading": {
"type": "object",
"comment": ["Parameters related to ORF loading.",
"- minimal_secondary_orf_length: Minimum length of a *secondary* ORF",
" to be loaded after the first, in bp. Default: 200 bps",
"- minimal_orf_length: Minimum length in bps of an ORF to be loaded,",
" as the primary ORF, onto a transcript. Default: 50 bps",
"- strand_specific: Boolean flag. If set to true, monoexonic transcripts",
" will not have their ORF reversed even if they would have an ORF on the opposite strand."
],
"properties": {
"minimal_secondary_orf_length": {
"type": "integer",
"default": 200,
"minimum": 0
},
"minimal_orf_length": {
"type": "integer",
"default": 50,
"minimum": 0
},
"strand_specific": {
"type": "boolean",
"default": true
}
}
},
"run_options": {
"comment": ["Generic run options.",
"- shm: boolean flag. If set and the DB is sqlite, it will be copied onto the /dev/shm faux partition",
"- shm_db: String. It indicates a DB that has to be copied onto SHM and left there for",
" concurrent Mikado runs.",
"- shm_shared: boolean flag. If set, the database loaded onto SHM will be shared and should not be",
" deleted at the end of the run (see shm_db).",
" for faster access. Default: false",
"- exclude_cds: boolean flag. If set, the CDS information will not be printed in Mikado output. Default: false",
"- purge: boolean flag. If set, all loci where all transcripts have a score of 0 will be excluded",
" from the output. Default: false",
"- remove_overlapping_fragments: boolean flag. If set, fragments (defined as monoexonic loci",
" classified as P,x,i or p compared to another locus, will be removed from the output.",
"- fragments_maximal_cds: a monoexonic superlocus will never be considered a fragment if its longest CDS is over",
" this length. Default: 100 bps.",
"- threads: number of threads to use. Default: 1",
"- preload: boolean flag. If set, the whole database will be preloaded into memory for faster access. Useful when",
" using SQLite databases."],
"type": "object",
"properties": {
"shm": {
"type": "boolean",
"default": false
},
"shm_db":{"type": "string", "default": ""},
"exclude_cds": {
"type": "boolean",
"default": false
},
"purge": {
"type": "boolean",
"default": false
},
"remove_overlapping_fragments": {
"type": "boolean",
"default": true
},
"fragments_maximal_cds": {
"type": "integer",
"default": 100
},
"subloci_from_cds_only": {
"type": "boolean",
"default": false
},
"threads": {
"type": "integer",
"default": 1,
"minimum": 1
},
"preload": {
"type": "boolean",
"default": false
}
}
},
"scoring_file": {
"type": "string",
"default": "scoring.yaml"
}
}
}
Loading

0 comments on commit 0c2c04b

Please sign in to comment.