-
Notifications
You must be signed in to change notification settings - Fork 743
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Separate map file in second channel for glimpse2 chunk * Change samples infos files of place Add input, output region and map as optional Add output region as prefix * Change samples infos files in glimpse_phase * Change samples_infos file way to combine * Remove view() * Update vcf_impute_glimpse to respect previous change in glimpse process * Add new sbwf for glimpse2 * Update file name * Change input1 to input * Remove md5 sum of bin file * Correct test glimpse2_phase * Small changes * Add keyword to glimpse_chunk * Update tests/modules/nf-core/glimpse/concordance/main.nf Co-authored-by: Adam Talbot <12817534+adamrtalbot@users.noreply.github.com> --------- Co-authored-by: Adam Talbot <12817534+adamrtalbot@users.noreply.github.com>
- Loading branch information
1 parent
d68b2e6
commit c7ddd48
Showing
30 changed files
with
610 additions
and
222 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
include { GLIMPSE2_CHUNK } from '../../../modules/nf-core/glimpse2/chunk/main' | ||
include { GLIMPSE2_SPLITREFERENCE } from '../../../modules/nf-core/glimpse2/splitreference/main' | ||
include { GLIMPSE2_PHASE } from '../../../modules/nf-core/glimpse2/phase/main' | ||
include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate/main' | ||
include { BCFTOOLS_INDEX as INDEX_PHASE } from '../../../modules/nf-core/bcftools/index/main.nf' | ||
include { BCFTOOLS_INDEX as INDEX_LIGATE } from '../../../modules/nf-core/bcftools/index/main.nf' | ||
|
||
workflow MULTIPLE_IMPUTE_GLIMPSE2 { | ||
|
||
take: | ||
ch_input // channel (mandatory): [ meta, vcf, csi, infos ] | ||
ch_ref // channel (mandatory): [ meta, vcf, csi, region ] | ||
ch_map // channel (optional): [ meta, map ] | ||
ch_fasta // channel (optional): [ meta, fasta, index ] | ||
chunk_model // string: model used to chunk the reference panel | ||
|
||
main: | ||
|
||
ch_versions = Channel.empty() | ||
|
||
// Chunk reference panel | ||
GLIMPSE2_CHUNK ( ch_ref, ch_map, chunk_model ) | ||
ch_versions = ch_versions.mix( GLIMPSE2_CHUNK.out.versions.first() ) | ||
|
||
chunk_output = GLIMPSE2_CHUNK.out.chunk_chr | ||
.splitCsv(header: ['ID', 'Chr', 'RegionBuf', 'RegionCnk', 'WindowCm', | ||
'WindowMb', 'NbTotVariants', 'NbComVariants'], | ||
sep: "\t", skip: 0) | ||
.map { meta, it -> [meta, it["RegionBuf"], it["RegionCnk"]]} | ||
|
||
// Split reference panel in bin files | ||
split_input = ch_ref.map{ meta, ref, index, region -> [meta, ref, index]} | ||
.combine(chunk_output, by: 0) | ||
|
||
GLIMPSE2_SPLITREFERENCE( split_input, ch_map ) | ||
ch_versions = ch_versions.mix( GLIMPSE2_SPLITREFERENCE.out.versions.first() ) | ||
|
||
phase_input = ch_input.combine( GLIMPSE2_SPLITREFERENCE.out.bin_ref ) | ||
.map{ input_meta, input_file, input_index, input_infos, | ||
panel_meta, panel_bin -> | ||
[input_meta, input_file, input_index, input_infos, | ||
[], [], panel_bin, [], []] | ||
}/* Remove unnecessary meta maps | ||
add null index as we use a bin file, | ||
add null value for input and output region as we use a bin file */ | ||
|
||
// Phase input files for each reference bin files + indexing | ||
GLIMPSE2_PHASE ( phase_input, ch_fasta ) // [meta, vcf, index, sample_infos, regionin, regionout, regionindex, ref, ref_index, map], [ meta, fasta, index ] | ||
ch_versions = ch_versions.mix( GLIMPSE2_PHASE.out.versions.first() ) | ||
|
||
INDEX_PHASE ( GLIMPSE2_PHASE.out.phased_variant ) | ||
ch_versions = ch_versions.mix( INDEX_PHASE.out.versions.first() ) | ||
|
||
// Ligate all phased files in one and index it | ||
ligate_input = GLIMPSE2_PHASE.out.phased_variant | ||
.groupTuple() | ||
.combine( INDEX_PHASE.out.csi | ||
.groupTuple() | ||
.collect(), by: 0 ) | ||
|
||
GLIMPSE2_LIGATE ( ligate_input ) | ||
ch_versions = ch_versions.mix( GLIMPSE2_LIGATE.out.versions.first() ) | ||
|
||
INDEX_LIGATE ( GLIMPSE2_LIGATE.out.merged_variants ) | ||
ch_versions = ch_versions.mix( INDEX_LIGATE.out.versions.first() ) | ||
|
||
emit: | ||
chunk_chr = GLIMPSE2_CHUNK.out.chunk_chr // channel: [ val(meta), txt ] | ||
merged_variants = GLIMPSE2_LIGATE.out.merged_variants // channel: [ val(meta), bcf ] | ||
merged_variants_index = INDEX_LIGATE.out.csi // channel: [ val(meta), csi ] | ||
|
||
versions = ch_versions // channel: [ versions.yml ] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
name: "multiple_imputation_glimpse2" | ||
description: Impute VCF/BCF files, but also CRAM and BAM files with Glimpse2 | ||
keywords: | ||
- glimpse | ||
- chunk | ||
- phase | ||
- ligate | ||
- split_reference | ||
|
||
modules: | ||
- glimpse2/chunk | ||
- glimpse/2phase | ||
- glimpse2/ligate | ||
- glimpse2/split_reference | ||
- bcftools/index | ||
|
||
input: | ||
- ch_input: | ||
type: file | ||
description: | | ||
Target dataset in CRAM, BAM or VCF/BCF format. | ||
Index file of the input file. | ||
File with sample names and ploidy information. | ||
Structure: [ meta, file, index, txt ] | ||
- ch_ref: | ||
type: file | ||
description: | | ||
Reference panel of haplotypes in VCF/BCF format. | ||
Index file of the Reference panel file. | ||
Target region, usually a full chromosome (e.g. chr20:1000000-2000000 or chr20). | ||
The file could possibly be without GT field (for efficiency reasons a file containing only the positions is recommended). | ||
Structure: [ meta, vcf, csi, region ] | ||
- ch_map: | ||
type: file | ||
description: | | ||
File containing the genetic map. | ||
Structure: [ meta, gmap ] | ||
- ch_fasta: | ||
type: file | ||
description: | | ||
Reference genome in fasta format. | ||
Reference genome index in fai format | ||
Structure: [ meta, fasta, fai ] | ||
output: | ||
- chunk_chr: | ||
type: file | ||
description: | | ||
Tab delimited output txt file containing buffer and imputation regions. | ||
Structure: [meta, txt] | ||
- merged_variants: | ||
type: file | ||
description: | | ||
Output VCF/BCF file for the merged regions. | ||
Phased information (HS field) is updated accordingly for the full region. | ||
Structure: [ val(meta), bcf ] | ||
- merged_variants_index: | ||
type: file | ||
description: Index file of the ligated phased variants files. | ||
|
||
- versions: | ||
type: file | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
|
||
authors: | ||
- "@LouisLeNezet" |
Oops, something went wrong.