Skip to content

Commit 2362242

Browse files
author
Vicente
committed
added which param to vcf in dna-rna matching
1 parent 2c2d79e commit 2362242

File tree

1 file changed

+15
-10
lines changed

1 file changed

+15
-10
lines changed

drop/modules/mae-pipeline/QC/create_matrix_dna_rna_cor.R

+15-10
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
#' output:
1313
#' - mat_qc: '`sm cfg.getProcessedResultsDir() +
1414
#' "/mae/{dataset}/dna_rna_qc_matrix.Rds"`'
15-
#' threads: 20
15+
#' threads: 30
1616
#' type: script
1717
#'---
1818

@@ -51,12 +51,20 @@ N <- length(vcf_files)
5151

5252
lp <- bplapply(1:N, function(i){
5353

54+
gr_res <- copy(gr_test)
55+
5456
# Read sample vcf file
55-
5657
sample <- dna_samples[i] %>% as.character()
57-
58-
param <- ScanVcfParam(fixed=NA, info='NT', geno='GT', samples=sample, trimEmpty=TRUE)
59-
vcf_sample <- readVcf(vcf_files[i], param = param, row.names = FALSE)
58+
vcf_file <- vcf_files[i]
59+
60+
## Read only the positions to perform the matching from the DNA vcf file
61+
## First, find out the chr style
62+
chrs <- row.names(scanVcfHeader(vcf_file)@header$contig)
63+
seqlevelsStyle(gr_res) <- seqlevelsStyle(chrs)[1]
64+
gr_res <- keepSeqlevels(gr_res, chrs, pruning.mode = 'coarse')
65+
param <- ScanVcfParam(samples=sample, fixed=NA, info='NT', geno='GT',
66+
trimEmpty=TRUE, which = gr_res)
67+
vcf_sample <- readVcf(vcf_file, param = param, row.names = FALSE)
6068
# Get GRanges and add Genotype
6169
gr_sample <- granges(vcf_sample)
6270

@@ -71,13 +79,11 @@ lp <- bplapply(1:N, function(i){
7179
gt <- gsub('ref', '0/0', gt)
7280
gt <- gsub('het', '0/1', gt)
7381
gt <- gsub('hom', '1/1', gt)
74-
}
82+
}
7583

7684
mcols(gr_sample)$GT <- gt
7785

7886
# Find overlaps between test and sample
79-
gr_res <- copy(gr_test)
80-
seqlevelsStyle(gr_res) <- seqlevelsStyle(seqlevelsInUse(gr_sample))[1]
8187
ov <- findOverlaps(gr_res, gr_sample, type = 'equal')
8288
mcols(gr_res)[from(ov),]$GT <- mcols(gr_sample)[to(ov),]$GT
8389

@@ -101,5 +107,4 @@ mat <- mat[unique(sa[rows_in_group, DNA_ID]),
101107
unique(sa[rows_in_group, RNA_ID]),
102108
drop=FALSE]
103109

104-
saveRDS(mat, snakemake@output$mat_qc)
105-
110+
saveRDS(mat, snakemake@output$mat_qc)

0 commit comments

Comments
 (0)