-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathconfig-schema.json
1 lines (1 loc) · 54.6 KB
/
config-schema.json
1
{"properties":{"chain-local-alignments":{"type":"object","properties":{"chained-las":{"type":"string","description":"write alignment chains to <chained-las>."},"max-relative-overlap":{"type":"number","description":"two local alignments may only be chained if the overlap between them is at most <fraction> times the size of the shorter local alignment. This must hold for the reference and query. (default: 0.30)"},"min-score":{"type":"integer","description":"output chains with a score of at least <int> (default: trace point spacing of alignment)"},"min-relative-score":{"type":"number","description":"output chains with a score of at least <fraction> of the best chains score. A value of 1.0 means that only chains with the best chains score will be accepted; a value of 0.0 means that all chains will be accepted (default: 1.0)"},"q":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"v":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"progress-every":{"type":"integer","description":"Print status reports every <msecs>. (default: 500)"},"max-chain-gap":{"type":"integer","description":"two local alignments may only be chained if at most <bps> of sequence in the A-read and B-read are unaligned. (default: 10000)"},"progress":{"type":"boolean","description":"Print regular status reports on the progress."},"max-indel":{"type":"integer","description":"two local alignments may only be chained if the resulting insertion or deletion is at most <bps> (default: 1000)"},"reference":{"type":"string","description":"reference assembly in .dam format"},"revert":{"type":{"$ref":"#/$defs/option-list"},"description":"revert named option to default value. This is useful to revert specific options of a config file."},"quiet":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"reads":{"type":"string","description":"set of PacBio reads in .db/.dam format"},"alignment":{"type":"string","description":"self-alignment of the reference assembly or reads vs. reference alignment"},"progress-format":{"description":"Use <format> for status report lines where <format> is either `human` or `json`. The former prints a status line that updates regularly while the latter prints a full JSON record per line with every update (default: human)","enum":["human","json"]},"verbose":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."}}},"filter-mask":{"type":"object","properties":{"min-gap-size":{"type":"integer","description":"minimum size for gaps between mask intervals (default: 0)"},"q":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"min-interval-size":{"type":"integer","description":"minimum size for mask intervals (default: 0)"},"v":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"filtered-mask":{"type":"string","description":"write filtered Dazzler mask to <filtered-mask>"},"input-mask":{"type":"string","description":"filter Dazzler mask <input-mask>"},"reference":{"type":"string","description":"reference assembly in .dam format"},"revert":{"type":{"$ref":"#/$defs/option-list"},"description":"revert named option to default value. This is useful to revert specific options of a config file."},"quiet":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"verbose":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."}}},"show-mask":{"type":"object","properties":{"repeat-mask":{"type":"array","description":"read Dazzler mask <repeat-mask>","items":{"type":"string"}},"q":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"j":{"type":"boolean","description":"if given write the information in JSON format"},"v":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"json":{"type":"boolean","description":"if given write the information in JSON format"},"reference":{"type":"string","description":"reference assembly in .dam format"},"revert":{"type":{"$ref":"#/$defs/option-list"},"description":"revert named option to default value. This is useful to revert specific options of a config file."},"quiet":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"verbose":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."}}},"show-pile-ups":{"type":"object","properties":{"q":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"pile-ups":{"type":"string","description":"read pile ups from <pile-ups>"},"j":{"type":"boolean","description":"if given write the information in JSON format"},"v":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"json":{"type":"boolean","description":"if given write the information in JSON format"},"revert":{"type":{"$ref":"#/$defs/option-list"},"description":"revert named option to default value. This is useful to revert specific options of a config file."},"quiet":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"verbose":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."}}},"output":{"type":"object","properties":{"agp":{"type":"string","description":"write AGP v2.1 file that describes the output assembly"},"scaffolding":{"type":"string","description":"write the assembly scaffold to <insertions-db>; use `show-insertions` to inspect the result"},"verbose":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"result":{"type":"string","description":"write gap-closed assembly to <result> (default: stdout)"},"fasta-line-width":{"type":"integer","description":"line width for ouput FASTA (default: 50)"},"q":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"skip-gaps":{"type":{"$ref":"#/$defs/option-list"},"description":"Do not close the specified gaps. Each <gap-spec> is a pair of contig IDs <contigA>-<contigA> meaning that the specified contigs should not be closed. They will still be joined by a prexisting gap."},"v":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"closed-gaps-bed":{"type":"string","description":"write BED file with coordinates of closed gaps"},"max-insertion-error":{"type":"number","description":"insertion and existing contigs must match with less error than <double> (default: 0.10)"},"insertions":{"type":"string","description":"read insertion information from <insertions> generated by the `merge-insertions` command"},"agp-dazzler":{"type":"boolean","description":"use Dazzler DB contig IDs to identify AGP components"},"min-extension-length":{"type":"integer","description":"extensions must have at least <ulong> bps of consensus to be inserted (default: 100)"},"no-highlight-insertions":{"type":"boolean","description":"turn off highlighting (upper case) of inserted sequences in the FASTA output"},"reference":{"type":"string","description":"reference assembly in .dam format"},"w":{"type":"integer","description":"line width for ouput FASTA (default: 50)"},"agp-skip-read-ids":{"type":"boolean","description":"do not enumerate all reads used for gap closing in AGP file to reduce the resulting file size"},"H":{"type":"boolean","description":"turn off highlighting (upper case) of inserted sequences in the FASTA output"},"revert":{"type":{"$ref":"#/$defs/option-list"},"description":"revert named option to default value. This is useful to revert specific options of a config file."},"reads":{"type":"string","description":"set of PacBio reads in .db/.dam format"},"skip-gaps-file":{"type":"string","description":"Same as --skip-gaps but <file> contains one <gap-spec> per line. If both options are given the union of all <gap-spec>s will be used. Empty lines and lines starting with `#` will be ignored."},"only":{"description":"only process/output insertions of the given type. Allowed values are `spanning` (i.e. if two contigs would be properly joined), `extending` (i.e. the sequence just extends the contig but does not reach across two contigs) and `both`. Note, extending insertions are experimental and may produce invalid results. (default: spanning)","enum":["spanning","extending","both"]},"quiet":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"join-policy":{"description":"allow only joins (gap filling) in the given mode: `scaffoldGaps` (only join gaps inside of scaffolds – marked by `n`s in FASTA), `scaffolds` (join gaps inside of scaffolds and try to join scaffolds), `contigs` (break input into contigs and re-scaffold everything; maintains scaffold gaps where new scaffolds are consistent) (default: `scaffoldGaps`)","enum":["scaffoldGaps","scaffolds","contigs"]}}},"mask-repetitive-regions":{"type":"object","properties":{"verbose":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"repeat-mask":{"type":"string","description":"write inferred repeat mask into a Dazzler mask."},"proper-alignment-allowance":{"type":"integer","description":"An alignment is called proper if it is end-to-end with at most <num> bp allowance. (default: trace point spacing of alignment)"},"q":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"debug-repeat-masks":{"type":"boolean","description":"(only for reads-mask) write mask components into additional masks `<repeat-mask>-<component-type>`"},"max-coverage-reads":{"type":"integer","description":"this is used to derive a repeat mask from the ref vs. reads alignment; if the alignment coverage is larger than <uint> it will be considered repetitive; a default value is derived from --read-coverage; both options are mutually exclusive"},"max-improper-coverage-reads":{"type":"integer","description":"this is used to derive a repeat mask from the ref vs. reads alignment; if the coverage of improper alignments is larger than <uint> it will be considered repetitive; a default value is derived from --read-coverage; both options are mutually exclusive"},"C":{"type":"number","description":"This is used to provide good default values for --max-coverage-reads (mask-repetitive-regions) or --min-coverage-reads (validate-regions); --read-coverage and --*-coverage-reads are mutually exclusive. Ideally, the user provides the haploid read coverage which, for example, may be inferred using a histogram of the alignment coverage across the genome. Alternatively, the average raw read coverage can be used which is the number of base pairs in the reads divided by the number of base pairs in the assembly."},"v":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"max-coverage-self":{"type":"integer","description":"this is used to derive a repeat mask from the self alignment; if the alignment coverage larger than <uint> it will be considered repetitive (default: 4)"},"reference":{"type":"string","description":"reference assembly in .dam format"},"revert":{"type":{"$ref":"#/$defs/option-list"},"description":"revert named option to default value. This is useful to revert specific options of a config file."},"quiet":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"reads":{"type":"string","description":"set of PacBio reads in .db/.dam format"},"alignment":{"type":"string","description":"self-alignment of the reference assembly or reads vs. reference alignment"},"read-coverage":{"type":"number","description":"This is used to provide good default values for --max-coverage-reads (mask-repetitive-regions) or --min-coverage-reads (validate-regions); --read-coverage and --*-coverage-reads are mutually exclusive. Ideally, the user provides the haploid read coverage which, for example, may be inferred using a histogram of the alignment coverage across the genome. Alternatively, the average raw read coverage can be used which is the number of base pairs in the reads divided by the number of base pairs in the assembly."}}},"bed2mask":{"type":"object","properties":{"q":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"v":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"mask":{"type":"string","description":"name of output Dazzler mask"},"bed":{"type":"string","description":"input BED file; fields must be TAB-delimited (default: standard input)"},"reference":{"type":"string","description":"reference assembly in .dam format"},"revert":{"type":{"$ref":"#/$defs/option-list"},"description":"revert named option to default value. This is useful to revert specific options of a config file."},"quiet":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"data-comments":{"type":"boolean","description":"parse BED comments (column 4) as generated by `output`. This will cause a crash if formatting errors are encountered."},"verbose":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."}}},"propagate-mask":{"type":"object","properties":{"verbose":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"repeat-mask":{"type":"string","description":"write inferred repeat mask into a Dazzler mask."},"q":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"v":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"mask":{"type":{"$ref":"#/$defs/option-list"},"description":"Dazzler masks for repetitive regions (at least one required; generate with `mask-repetitive-regions` command)"},"reference":{"type":"string","description":"reference assembly in .dam format"},"revert":{"type":{"$ref":"#/$defs/option-list"},"description":"revert named option to default value. This is useful to revert specific options of a config file."},"quiet":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"reads":{"type":"string","description":"set of PacBio reads in .db/.dam format"},"alignment":{"type":"string","description":"self-alignment of the reference assembly or reads vs. reference alignment"},"m":{"type":{"$ref":"#/$defs/option-list"},"description":"Dazzler masks for repetitive regions (at least one required; generate with `mask-repetitive-regions` command)"}}},"merge-masks":{"type":"object","properties":{"input-masks":{"type":"array","description":"merge these Dazzler masks","items":{"type":"string"}},"q":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"v":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"reference":{"type":"string","description":"reference assembly in .dam format"},"revert":{"type":{"$ref":"#/$defs/option-list"},"description":"revert named option to default value. This is useful to revert specific options of a config file."},"quiet":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"merged-mask":{"type":"string","description":"name of merged mask"},"verbose":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."}}},"__default__":{"type":"object","properties":{"chained-las":{"type":"string","description":"write alignment chains to <chained-las>."},"weak-coverage-window":{"type":"integer","description":"consider sliding window of <bps> base pairs to identify weak coverage (default: 500)"},"datander-ref":{"type":{"$ref":"#/$defs/option-list"},"description":"Provide additional options to `datander`"},"config":{"type":"string","description":"provide configuration values in a YAML or JSON file. See README.md for usage and examples."},"batch":{"type":{"$ref":"#/$defs/option-list"},"description":"process only a subset of the pile ups. <pile-up-ids> is a comma-separated list of <idx-spec>. Each <id-specifications> is either a single integer <idx> or a range <from>..<to>. <idx>, <from> and <to> are zero-based indices into the pile up DB. The range is right-open, i.e. index <to> is excluded. <to> may be a dollar-sign (`$`) to indicate the end of the pile up DB."},"filtered-mask":{"type":"string","description":"write filtered Dazzler mask to <filtered-mask>"},"min-anchor-length":{"type":"integer","description":"alignment need to have at least this length of unique anchoring sequence (default: 500)"},"daligner-consensus":{"type":{"$ref":"#/$defs/option-list"},"description":"Provide additional options to `daligner`"},"min-spanning-reads":{"type":"integer","description":"require at least <ulong> spanning reads to close a gap (default: 3)"},"max-insertion-error":{"type":"number","description":"insertion and existing contigs must match with less error than <double> (default: 0.10)"},"progress":{"type":"boolean","description":"Print regular status reports on the progress."},"min-reads-per-pile-up":{"type":"integer","description":"pile ups must have at least <ulong> reads to be processed (default: 3)"},"data-comments":{"type":"boolean","description":"parse BED comments (column 4) as generated by `output`. This will cause a crash if formatting errors are encountered."},"no-merge-extension":{"type":"boolean","description":"Do not merge extension reads into spanning pile ups."},"revert":{"type":{"$ref":"#/$defs/option-list"},"description":"revert named option to default value. This is useful to revert specific options of a config file."},"bad-fraction":{"type":"number","description":"Intrinsic QVs are categorized as \"bad\" if they are greater or equal to the best QV of the worst <frac> trace point intervals. (default: 0.8)"},"skip-gaps-file":{"type":"string","description":"Same as --skip-gaps but <file> contains one <gap-spec> per line. If both options are given the union of all <gap-spec>s will be used. Empty lines and lines starting with `#` will be ignored."},"T":{"type":"integer","description":"use <uint> threads (default: number of cores)"},"no-highlight-insertions":{"type":"boolean","description":"turn off highlighting (upper case) of inserted sequences in the FASTA output"},"m":{"type":{"$ref":"#/$defs/option-list"},"description":"Dazzler masks for repetitive regions (at least one required; generate with `mask-repetitive-regions` command)"},"threads":{"type":"integer","description":"use <uint> threads (default: number of cores)"},"proper-alignment-allowance":{"type":"integer","description":"An alignment is called proper if it is end-to-end with at most <num> bp allowance. (default: trace point spacing of alignment)"},"v":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"max-coverage-reads":{"type":"integer","description":"this is used to derive a repeat mask from the ref vs. reads alignment; if the alignment coverage is larger than <uint> it will be considered repetitive; a default value is derived from --read-coverage; both options are mutually exclusive"},"A":{"type":"integer","description":"use <num-threads> threads for auxiliary tools like `daligner`, `damapper` and `daccord` (default: floor(totalCpus / <threads>) )"},"s":{"type":"integer","description":"require at least <ulong> spanning reads to close a gap (default: 3)"},"min-coverage-reads":{"type":"integer","description":"validly closed gaps must have a continuous coverage of at least <num> properly aligned reads; see --weak-coverage-mask for more details"},"debug-pile-ups":{"type":"string","description":"write pile ups of intermediate steps to `<db-stem>.<state>.db`"},"progress-every":{"type":"integer","description":"Print status reports every <msecs>. (default: 500)"},"max-chain-gap":{"type":"integer","description":"two local alignments may only be chained if at most <bps> of sequence in the A-read and B-read are unaligned. (default: 10000)"},"dust-reads":{"type":{"$ref":"#/$defs/option-list"},"description":"Provide additional options to `dust`"},"max-coverage-self":{"type":"integer","description":"this is used to derive a repeat mask from the self alignment; if the alignment coverage larger than <uint> it will be considered repetitive (default: 4)"},"json":{"type":"boolean","description":"if given write the information in JSON format"},"bed":{"type":"string","description":"input BED file; fields must be TAB-delimited (default: standard input)"},"report-all":{"type":"boolean","description":"report all validation results instead of only failed gaps"},"max-indel":{"type":"integer","description":"two local alignments may only be chained if the resulting insertion or deletion is at most <bps> (default: 1000)"},"w":{"type":"integer","description":"line width for ouput FASTA (default: 50)"},"coord-string":{"type":"array","description":"translate coordinate(s) given by <coord-string> of the result into coordinates on the reference. Coordinates are always 1-based. A <coord-string> the format `scaffold/<uint:scaffold-id>/<uint:coord>` which describes a coordinate on `>scaffold-<scaffold-id>` starting a the first base pair of the scaffold","items":{"type":"string"}},"daccord":{"type":{"$ref":"#/$defs/option-list"},"description":"Provide additional options to `daccord`"},"cache-contig-alignments":{"type":"string","description":"if given the contig location will be cached as JSON faking the effect of the same option in `check-results`. NOTE: the result has to amended manually to be fully valid. (default: )"},"alignment":{"type":"string","description":"self-alignment of the reference assembly or reads vs. reference alignment"},"merged-mask":{"type":"string","description":"name of merged mask"},"read-coverage":{"type":"number","description":"This is used to provide good default values for --max-coverage-reads (mask-repetitive-regions) or --min-coverage-reads (validate-regions); --read-coverage and --*-coverage-reads are mutually exclusive. Ideally, the user provides the haploid read coverage which, for example, may be inferred using a histogram of the alignment coverage across the genome. Alternatively, the average raw read coverage can be used which is the number of base pairs in the reads divided by the number of base pairs in the assembly."},"existing-gap-bonus":{"type":"number","description":"if a candidate would close an existing gap its size is multipled by <double> before conflict resolution (see --best-pile-up-margin). (default: 6.0)"},"min-gap-size":{"type":"integer","description":"minimum size for gaps between mask intervals (default: 0)"},"partitioned-insertions":{"type":"array","description":"merge insertion information from <partitioned-insertions>... generated by the `process-pile-ups` command","items":{"type":"string"}},"result":{"type":"string","description":"write gap-closed assembly to <result> (default: stdout)"},"q":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"min-score":{"type":"integer","description":"output chains with a score of at least <int> (default: trace point spacing of alignment)"},"min-interval-size":{"type":"integer","description":"minimum size for mask intervals (default: 0)"},"min-relative-score":{"type":"number","description":"output chains with a score of at least <fraction> of the best chains score. A value of 1.0 means that only chains with the best chains score will be accepted; a value of 0.0 means that all chains will be accepted (default: 1.0)"},"N":{"type":"integer","description":"this is used to derive a lower bound for the read coverage"},"max-alignment-error":{"type":"number","description":"local alignments may have an error rate of no more than <double> (default: 0.30)"},"max-improper-coverage-reads":{"type":"integer","description":"this is used to derive a repeat mask from the ref vs. reads alignment; if the coverage of improper alignments is larger than <uint> it will be considered repetitive; a default value is derived from --read-coverage; both options are mutually exclusive"},"C":{"type":"number","description":"This is used to provide good default values for --max-coverage-reads (mask-repetitive-regions) or --min-coverage-reads (validate-regions); --read-coverage and --*-coverage-reads are mutually exclusive. Ideally, the user provides the haploid read coverage which, for example, may be inferred using a histogram of the alignment coverage across the genome. Alternatively, the average raw read coverage can be used which is the number of base pairs in the reads divided by the number of base pairs in the assembly."},"agp":{"type":"string","description":"write AGP v2.1 file that describes the output assembly"},"min-extension-length":{"type":"integer","description":"extensions must have at least <ulong> bps of consensus to be inserted (default: 100)"},"best-pile-up-margin":{"type":"number","description":"given a set of of conflicting gap closing candidates, if the largest has <double> times more reads than the second largest it is considered unique. If a candidates would close gap in the reference assembly marked by `n`s the number reads is multipled by --existing-gap-bonus. (default: 3.0)"},"keep-temp":{"type":"boolean","description":"keep the temporary files; outputs the exact location"},"k":{"type":"boolean","description":"keep the temporary files; outputs the exact location"},"gap-closed-vs-reads-alignment":{"type":"string","description":"local alignments of the reads against the gap-closed reference in form of a .las file as produced by `damapper` or `daligner`. Chains are disregarded, e.i. chained alignments are split into local alignments."},"regions":{"type":"string","description":"Dazzler mask marking the regions to be validated"},"region-context":{"type":"integer","description":"consider <bps> base pairs of context for each region to detect splicing errors (default: 1000)"},"ref-vs-reads-alignment":{"type":"string","description":"alignments chains of the reads against the reference in form of a .las file, e.g. produced by `damapper`."},"tmpdir":{"type":"string","description":"use <string> as a working directory"},"reads":{"type":"string","description":"set of PacBio reads in .db/.dam format"},"e":{"type":"number","description":"local alignments may have an error rate of no more than <double> (default: 0.30)"},"daligner-self":{"type":{"$ref":"#/$defs/option-list"},"description":"Provide additional options to `daligner`"},"progress-format":{"description":"Use <format> for status report lines where <format> is either `human` or `json`. The former prints a status line that updates regularly while the latter prints a full JSON record per line with every update (default: human)","enum":["human","json"]},"dust-ref":{"type":{"$ref":"#/$defs/option-list"},"description":"Provide additional options to `dust`"},"verbose":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"input-masks":{"type":"array","description":"merge these Dazzler masks","items":{"type":"string"}},"b":{"type":{"$ref":"#/$defs/option-list"},"description":"process only a subset of the pile ups. <pile-up-ids> is a comma-separated list of <idx-spec>. Each <id-specifications> is either a single integer <idx> or a range <from>..<to>. <idx>, <from> and <to> are zero-based indices into the pile up DB. The range is right-open, i.e. index <to> is excluded. <to> may be a dollar-sign (`$`) to indicate the end of the pile up DB."},"ploidy":{"type":"integer","description":"this is used to derive a lower bound for the read coverage"},"max-relative-overlap":{"type":"number","description":"two local alignments may only be chained if the overlap between them is at most <fraction> times the size of the shorter local alignment. This must hold for the reference and query. (default: 0.30)"},"fasta-line-width":{"type":"integer","description":"line width for ouput FASTA (default: 50)"},"j":{"type":"boolean","description":"if given write the information in JSON format"},"debug-repeat-masks":{"type":"boolean","description":"(only for reads-mask) write mask components into additional masks `<repeat-mask>-<component-type>`"},"weak-coverage-mask":{"type":"string","description":"write a Dazzler mask <mask> of weakly covered regions, e.i. sliding windows of --weak-coverage-window base pairs are spanned by less than --min-coverage-reads local alignments"},"skip-gaps":{"type":{"$ref":"#/$defs/option-list"},"description":"Do not close the specified gaps. Each <gap-spec> is a pair of contig IDs <contigA>-<contigA> meaning that the specified contigs should not be closed. They will still be joined by a prexisting gap."},"closed-gaps-bed":{"type":"string","description":"write BED file with coordinates of closed gaps"},"only":{"description":"only process/output insertions of the given type. Allowed values are `spanning` (i.e. if two contigs would be properly joined), `extending` (i.e. the sequence just extends the contig but does not reach across two contigs) and `both`. Note, extending insertions are experimental and may produce invalid results. (default: spanning)","enum":["spanning","extending","both"]},"auxiliary-threads":{"type":"integer","description":"use <num-threads> threads for auxiliary tools like `daligner`, `damapper` and `daccord` (default: floor(totalCpus / <threads>) )"},"P":{"type":"string","description":"use <string> as a working directory"},"agp-dazzler":{"type":"boolean","description":"use Dazzler DB contig IDs to identify AGP components"},"input-mask":{"type":"string","description":"filter Dazzler mask <input-mask>"},"aux-threads":{"type":"integer","description":"use <num-threads> threads for auxiliary tools like `daligner`, `damapper` and `daccord` (default: floor(totalCpus / <threads>) )"},"allow-single-reads":{"type":"boolean","description":"allow using single reads instead of consensus sequence for gap closing"},"reference":{"type":"string","description":"reference assembly in .dam format"},"daligner-reads-vs-reads":{"type":{"$ref":"#/$defs/option-list"},"description":"Provide additional options to `daligner`"},"quiet":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"damapper-ref-vs-reads":{"type":{"$ref":"#/$defs/option-list"},"description":"Provide additional options to `damapper`"},"H":{"type":"boolean","description":"turn off highlighting (upper case) of inserted sequences in the FASTA output"},"agp-skip-read-ids":{"type":"boolean","description":"do not enumerate all reads used for gap closing in AGP file to reduce the resulting file size"},"join-policy":{"description":"allow only joins (gap filling) in the given mode: `scaffoldGaps` (only join gaps inside of scaffolds – marked by `n`s in FASTA), `scaffolds` (join gaps inside of scaffolds and try to join scaffolds), `contigs` (break input into contigs and re-scaffold everything; maintains scaffold gaps where new scaffolds are consistent) (default: `scaffoldGaps`)","enum":["scaffoldGaps","scaffolds","contigs"]}}},"collect-pile-ups":{"type":"object","properties":{"min-anchor-length":{"type":"integer","description":"alignment need to have at least this length of unique anchoring sequence (default: 500)"},"min-spanning-reads":{"type":"integer","description":"require at least <ulong> spanning reads to close a gap (default: 3)"},"no-merge-extension":{"type":"boolean","description":"Do not merge extension reads into spanning pile ups."},"revert":{"type":{"$ref":"#/$defs/option-list"},"description":"revert named option to default value. This is useful to revert specific options of a config file."},"T":{"type":"integer","description":"use <uint> threads (default: number of cores)"},"m":{"type":{"$ref":"#/$defs/option-list"},"description":"Dazzler masks for repetitive regions (at least one required; generate with `mask-repetitive-regions` command)"},"threads":{"type":"integer","description":"use <uint> threads (default: number of cores)"},"A":{"type":"integer","description":"use <num-threads> threads for auxiliary tools like `daligner`, `damapper` and `daccord` (default: floor(totalCpus / <threads>) )"},"pile-ups":{"type":"string","description":"write inferred pile ups into <pile-ups>"},"proper-alignment-allowance":{"type":"integer","description":"An alignment is called proper if it is end-to-end with at most <num> bp allowance. (default: trace point spacing of alignment)"},"debug-pile-ups":{"type":"string","description":"write pile ups of intermediate steps to `<db-stem>.<state>.db`"},"v":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"s":{"type":"integer","description":"require at least <ulong> spanning reads to close a gap (default: 3)"},"existing-gap-bonus":{"type":"number","description":"if a candidate would close an existing gap its size is multipled by <double> before conflict resolution (see --best-pile-up-margin). (default: 6.0)"},"q":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"best-pile-up-margin":{"type":"number","description":"given a set of of conflicting gap closing candidates, if the largest has <double> times more reads than the second largest it is considered unique. If a candidates would close gap in the reference assembly marked by `n`s the number reads is multipled by --existing-gap-bonus. (default: 3.0)"},"max-alignment-error":{"type":"number","description":"local alignments may have an error rate of no more than <double> (default: 0.30)"},"keep-temp":{"type":"boolean","description":"keep the temporary files; outputs the exact location"},"k":{"type":"boolean","description":"keep the temporary files; outputs the exact location"},"ref-vs-reads-alignment":{"type":"string","description":"alignments chains of the reads against the reference in form of a .las file, e.g. produced by `damapper`."},"tmpdir":{"type":"string","description":"use <string> as a working directory"},"reads":{"type":"string","description":"set of PacBio reads in .db/.dam format"},"e":{"type":"number","description":"local alignments may have an error rate of no more than <double> (default: 0.30)"},"verbose":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"auxiliary-threads":{"type":"integer","description":"use <num-threads> threads for auxiliary tools like `daligner`, `damapper` and `daccord` (default: floor(totalCpus / <threads>) )"},"P":{"type":"string","description":"use <string> as a working directory"},"mask":{"type":{"$ref":"#/$defs/option-list"},"description":"Dazzler masks for repetitive regions (at least one required; generate with `mask-repetitive-regions` command)"},"aux-threads":{"type":"integer","description":"use <num-threads> threads for auxiliary tools like `daligner`, `damapper` and `daccord` (default: floor(totalCpus / <threads>) )"},"reference":{"type":"string","description":"reference assembly in .dam format"},"quiet":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"damapper-ref-vs-reads":{"type":{"$ref":"#/$defs/option-list"},"description":"Provide additional options to `damapper`"}}},"translate-coords":{"type":"object","properties":{"scaffolding":{"type":"string","description":"read the assembly graph from <scaffolding> generate (see `--scaffolding` of the `output` command)"},"coord-string":{"type":"array","description":"translate coordinate(s) given by <coord-string> of the result into coordinates on the reference. Coordinates are always 1-based. A <coord-string> the format `scaffold/<uint:scaffold-id>/<uint:coord>` which describes a coordinate on `>scaffold-<scaffold-id>` starting a the first base pair of the scaffold","items":{"type":"string"}},"q":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"j":{"type":"boolean","description":"if given write the information in JSON format"},"v":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"json":{"type":"boolean","description":"if given write the information in JSON format"},"revert":{"type":{"$ref":"#/$defs/option-list"},"description":"revert named option to default value. This is useful to revert specific options of a config file."},"quiet":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"verbose":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."}}},"validate-regions":{"type":"object","properties":{"verbose":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"regions":{"type":"string","description":"Dazzler mask marking the regions to be validated"},"threads":{"type":"integer","description":"use <uint> threads (default: number of cores)"},"proper-alignment-allowance":{"type":"integer","description":"An alignment is called proper if it is end-to-end with at most <num> bp allowance. (default: trace point spacing of alignment)"},"q":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"N":{"type":"integer","description":"this is used to derive a lower bound for the read coverage"},"weak-coverage-window":{"type":"integer","description":"consider sliding window of <bps> base pairs to identify weak coverage (default: 500)"},"v":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"weak-coverage-mask":{"type":"string","description":"write a Dazzler mask <mask> of weakly covered regions, e.i. sliding windows of --weak-coverage-window base pairs are spanned by less than --min-coverage-reads local alignments"},"s":{"type":"integer","description":"require at least <ulong> spanning reads to close a gap (default: 3)"},"min-coverage-reads":{"type":"integer","description":"validly closed gaps must have a continuous coverage of at least <num> properly aligned reads; see --weak-coverage-mask for more details"},"C":{"type":"number","description":"This is used to provide good default values for --max-coverage-reads (mask-repetitive-regions) or --min-coverage-reads (validate-regions); --read-coverage and --*-coverage-reads are mutually exclusive. Ideally, the user provides the haploid read coverage which, for example, may be inferred using a histogram of the alignment coverage across the genome. Alternatively, the average raw read coverage can be used which is the number of base pairs in the reads divided by the number of base pairs in the assembly."},"min-spanning-reads":{"type":"integer","description":"require at least <ulong> spanning reads to close a gap (default: 3)"},"report-all":{"type":"boolean","description":"report all validation results instead of only failed gaps"},"region-context":{"type":"integer","description":"consider <bps> base pairs of context for each region to detect splicing errors (default: 1000)"},"gap-closed-vs-reads-alignment":{"type":"string","description":"local alignments of the reads against the gap-closed reference in form of a .las file as produced by `damapper` or `daligner`. Chains are disregarded, e.i. chained alignments are split into local alignments."},"reference":{"type":"string","description":"reference assembly in .dam format"},"revert":{"type":{"$ref":"#/$defs/option-list"},"description":"revert named option to default value. This is useful to revert specific options of a config file."},"quiet":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"reads":{"type":"string","description":"set of PacBio reads in .db/.dam format"},"T":{"type":"integer","description":"use <uint> threads (default: number of cores)"},"ploidy":{"type":"integer","description":"this is used to derive a lower bound for the read coverage"},"read-coverage":{"type":"number","description":"This is used to provide good default values for --max-coverage-reads (mask-repetitive-regions) or --min-coverage-reads (validate-regions); --read-coverage and --*-coverage-reads are mutually exclusive. Ideally, the user provides the haploid read coverage which, for example, may be inferred using a histogram of the alignment coverage across the genome. Alternatively, the average raw read coverage can be used which is the number of base pairs in the reads divided by the number of base pairs in the assembly."}}},"generate-dazzler-options":{"type":"object","properties":{"q":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"datander-ref":{"type":{"$ref":"#/$defs/option-list"},"description":"Provide additional options to `datander`"},"v":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"max-alignment-error":{"type":"number","description":"local alignments may have an error rate of no more than <double> (default: 0.30)"},"min-anchor-length":{"type":"integer","description":"alignment need to have at least this length of unique anchoring sequence (default: 500)"},"revert":{"type":{"$ref":"#/$defs/option-list"},"description":"revert named option to default value. This is useful to revert specific options of a config file."},"quiet":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"damapper-ref-vs-reads":{"type":{"$ref":"#/$defs/option-list"},"description":"Provide additional options to `damapper`"},"daligner-self":{"type":{"$ref":"#/$defs/option-list"},"description":"Provide additional options to `daligner`"},"e":{"type":"number","description":"local alignments may have an error rate of no more than <double> (default: 0.30)"},"dust-ref":{"type":{"$ref":"#/$defs/option-list"},"description":"Provide additional options to `dust`"},"verbose":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."}}},"validate-config":{"type":"object","properties":{"q":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"v":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"config":{"type":"string","description":"provide configuration values in a YAML or JSON file. See README.md for usage and examples."},"revert":{"type":{"$ref":"#/$defs/option-list"},"description":"revert named option to default value. This is useful to revert specific options of a config file."},"quiet":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"cache-contig-alignments":{"type":"string","description":"if given the contig location will be cached as JSON faking the effect of the same option in `check-results`. NOTE: the result has to amended manually to be fully valid. (default: )"},"verbose":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."}}},"process-pile-ups":{"type":"object","properties":{"datander-ref":{"type":{"$ref":"#/$defs/option-list"},"description":"Provide additional options to `datander`"},"batch":{"type":{"$ref":"#/$defs/option-list"},"description":"process only a subset of the pile ups. <pile-up-ids> is a comma-separated list of <idx-spec>. Each <id-specifications> is either a single integer <idx> or a range <from>..<to>. <idx>, <from> and <to> are zero-based indices into the pile up DB. The range is right-open, i.e. index <to> is excluded. <to> may be a dollar-sign (`$`) to indicate the end of the pile up DB."},"daligner-consensus":{"type":{"$ref":"#/$defs/option-list"},"description":"Provide additional options to `daligner`"},"min-anchor-length":{"type":"integer","description":"alignment need to have at least this length of unique anchoring sequence (default: 500)"},"min-reads-per-pile-up":{"type":"integer","description":"pile ups must have at least <ulong> reads to be processed (default: 3)"},"bad-fraction":{"type":"number","description":"Intrinsic QVs are categorized as \"bad\" if they are greater or equal to the best QV of the worst <frac> trace point intervals. (default: 0.8)"},"revert":{"type":{"$ref":"#/$defs/option-list"},"description":"revert named option to default value. This is useful to revert specific options of a config file."},"T":{"type":"integer","description":"use <uint> threads (default: number of cores)"},"m":{"type":{"$ref":"#/$defs/option-list"},"description":"Dazzler masks for repetitive regions (at least one required; generate with `mask-repetitive-regions` command)"},"threads":{"type":"integer","description":"use <uint> threads (default: number of cores)"},"A":{"type":"integer","description":"use <num-threads> threads for auxiliary tools like `daligner`, `damapper` and `daccord` (default: floor(totalCpus / <threads>) )"},"pile-ups":{"type":"string","description":"read pile ups from <pile-ups>"},"proper-alignment-allowance":{"type":"integer","description":"An alignment is called proper if it is end-to-end with at most <num> bp allowance. (default: trace point spacing of alignment)"},"v":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"insertions":{"type":"string","description":"write insertion information into <insertions>"},"max-chain-gap":{"type":"integer","description":"two local alignments may only be chained if at most <bps> of sequence in the A-read and B-read are unaligned. (default: 10000)"},"dust-reads":{"type":{"$ref":"#/$defs/option-list"},"description":"Provide additional options to `dust`"},"max-indel":{"type":"integer","description":"two local alignments may only be chained if the resulting insertion or deletion is at most <bps> (default: 1000)"},"daccord":{"type":{"$ref":"#/$defs/option-list"},"description":"Provide additional options to `daccord`"},"min-score":{"type":"integer","description":"output chains with a score of at least <int> (default: trace point spacing of alignment)"},"q":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"min-relative-score":{"type":"number","description":"output chains with a score of at least <fraction> of the best chains score. A value of 1.0 means that only chains with the best chains score will be accepted; a value of 0.0 means that all chains will be accepted (default: 1.0)"},"max-alignment-error":{"type":"number","description":"local alignments may have an error rate of no more than <double> (default: 0.30)"},"k":{"type":"boolean","description":"keep the temporary files; outputs the exact location"},"keep-temp":{"type":"boolean","description":"keep the temporary files; outputs the exact location"},"tmpdir":{"type":"string","description":"use <string> as a working directory"},"reads":{"type":"string","description":"set of PacBio reads in .db/.dam format"},"e":{"type":"number","description":"local alignments may have an error rate of no more than <double> (default: 0.30)"},"b":{"type":{"$ref":"#/$defs/option-list"},"description":"process only a subset of the pile ups. <pile-up-ids> is a comma-separated list of <idx-spec>. Each <id-specifications> is either a single integer <idx> or a range <from>..<to>. <idx>, <from> and <to> are zero-based indices into the pile up DB. The range is right-open, i.e. index <to> is excluded. <to> may be a dollar-sign (`$`) to indicate the end of the pile up DB."},"only":{"description":"only process/output insertions of the given type. Allowed values are `spanning` (i.e. if two contigs would be properly joined), `extending` (i.e. the sequence just extends the contig but does not reach across two contigs) and `both`. Note, extending insertions are experimental and may produce invalid results. (default: spanning)","enum":["spanning","extending","both"]},"daligner-self":{"type":{"$ref":"#/$defs/option-list"},"description":"Provide additional options to `daligner`"},"verbose":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"max-relative-overlap":{"type":"number","description":"two local alignments may only be chained if the overlap between them is at most <fraction> times the size of the shorter local alignment. This must hold for the reference and query. (default: 0.30)"},"auxiliary-threads":{"type":"integer","description":"use <num-threads> threads for auxiliary tools like `daligner`, `damapper` and `daccord` (default: floor(totalCpus / <threads>) )"},"P":{"type":"string","description":"use <string> as a working directory"},"mask":{"type":{"$ref":"#/$defs/option-list"},"description":"Dazzler masks for repetitive regions (at least one required; generate with `mask-repetitive-regions` command)"},"aux-threads":{"type":"integer","description":"use <num-threads> threads for auxiliary tools like `daligner`, `damapper` and `daccord` (default: floor(totalCpus / <threads>) )"},"allow-single-reads":{"type":"boolean","description":"allow using single reads instead of consensus sequence for gap closing"},"reference":{"type":"string","description":"reference assembly in .dam format"},"daligner-reads-vs-reads":{"type":{"$ref":"#/$defs/option-list"},"description":"Provide additional options to `daligner`"},"quiet":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."}}},"show-insertions":{"type":"object","properties":{"q":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"j":{"type":"boolean","description":"if given write the information in JSON format"},"v":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"insertions":{"type":"string","description":"read insertion information from <insertions> generated by the `merge-insertions` command"},"json":{"type":"boolean","description":"if given write the information in JSON format"},"revert":{"type":{"$ref":"#/$defs/option-list"},"description":"revert named option to default value. This is useful to revert specific options of a config file."},"quiet":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"verbose":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."}}},"merge-insertions":{"type":"object","properties":{"partitioned-insertions":{"type":"array","description":"merge insertion information from <partitioned-insertions>... generated by the `process-pile-ups` command","items":{"type":"string"}},"q":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"v":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."},"insertions":{"type":"string","description":"write merged insertion information to <insertions>"},"revert":{"type":{"$ref":"#/$defs/option-list"},"description":"revert named option to default value. This is useful to revert specific options of a config file."},"quiet":{"type":"boolean","description":"reduce output as much as possible reporting only fatal errors. If given this option overrides --verbose."},"verbose":{"type":"integer","description":"increase output to help identify problems; use up to three times. Warning: performance may be drastically reduced if using three times."}}}},"description":"Configuration file content for DENTIST.","$defs":{"option-list":{"anyOf":[{"type":"string"},{"type":["string"]}]}},"$schema":"https://json-schema.org/draft/2020-12/schema","type":"object","$id":"uri://a-ludi/dentist/v3/config.schema.json","title":"DENTIST config"}