-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #39 from CCBR/cons-peak-norm
feat: collapse peak p-values in bedtools/merge, reformat with new process
- Loading branch information
Showing
34 changed files
with
561 additions
and
197 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,3 +22,4 @@ work/ | |
/.quarto/ | ||
.Rproj.user | ||
*.Rproj | ||
.Rbuildignore |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
process CUSTOM_FORMATMERGEDBED { | ||
tag { meta.id } | ||
label 'process_medium' | ||
|
||
container 'nciccbr/consensus_peaks:v1.1' | ||
|
||
input: | ||
tuple val(meta), path(merged_bed) | ||
|
||
output: | ||
tuple val(meta), path("*.bed"), emit: bed | ||
path "versions.yml" , emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
outfile = "${merged_bed.baseName}.consensus.bed" | ||
template 'format_merged_bed.R' | ||
|
||
stub: | ||
""" | ||
touch ${merged_bed.baseName}.consensus.bed | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
: \$(echo \$(R --version | grep 'R version' | sed 's/R version //; s/ (.*//')) | ||
END_VERSIONS | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
--- | ||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json | ||
name: "custom_formatmergedbed" | ||
description: | | ||
Reformat consensus peaks from bedtools merge. | ||
Used in the consensus_peaks subworkflow. | ||
keywords: | ||
- chipseq | ||
- peaks | ||
- consensus | ||
- bedtools | ||
tools: | ||
- "R": | ||
description: "R is a free software environment for statistical computing and graphics" | ||
homepage: "https://www.r-project.org/" | ||
licence: ["GPL-3"] | ||
|
||
input: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'test', single_end:false ]` | ||
- merged_bed: | ||
type: file | ||
description: | | ||
Merged output file from calling | ||
`bedtools merge -c 1,5,6,7,8,9 -o count,collapse,collapse,collapse,collapse,collapse` | ||
on a concatenated & sorted peak file | ||
pattern: "*.bed" | ||
|
||
output: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'test', single_end:false ]` | ||
- bed: | ||
type: file | ||
description: | | ||
A narrow peak bed file with the best p-value for each consensus peak | ||
- versions: | ||
type: file | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
|
||
authors: | ||
- "@kelly-sovacool" | ||
maintainers: | ||
- "@kelly-sovacool" |
95 changes: 95 additions & 0 deletions
95
modules/CCBR/custom/formatmergedbed/templates/format_merged_bed.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
#!/usr/bin/env Rscript | ||
library(dplyr) | ||
library(glue) | ||
library(purrr) | ||
library(stringr) | ||
library(readr) | ||
library(tidyr) | ||
|
||
main <- function(version_file = "versions.yml", | ||
merged_file = "${merged_bed}", | ||
out_file = "${outfile}", | ||
n_cores = as.integer("${task.cpus}"), | ||
min_count = 1) { | ||
doFuture::registerDoFuture() | ||
future::plan(future::multicore, workers = n_cores) | ||
write_version(version_file) | ||
merged_dat <- read_tsv(merged_file, | ||
col_names = FALSE, | ||
col_types = "ciiiccccc" | ||
) | ||
if (nrow(merged_dat) == 0) { | ||
stop("The merged bed file is empty") | ||
} | ||
colnames(merged_dat) <- c( | ||
"chrom", "start", "end", | ||
"counts", "score_cat", "strand_cat", | ||
"signal_cat", "pvalue_cat", "qvalue_cat" | ||
) | ||
merged_dat %>% | ||
filter(counts >= min_count) %>% | ||
future.apply::future_apply(1, select_best_peak) %>% | ||
bind_rows() %>% | ||
select( | ||
"chrom", | ||
"start", | ||
"end", | ||
"peakID", | ||
"score", | ||
"strand", | ||
"signal", | ||
"pvalue", | ||
"qvalue" | ||
) %>% | ||
write_tsv(out_file, col_names = FALSE) | ||
} | ||
|
||
select_best_peak <- function(dat_row) { | ||
return( | ||
dat_row %>% | ||
vec_to_df() %>% | ||
pivot_collapsed_columns() %>% | ||
slice_max(pvalue) | ||
) | ||
} | ||
|
||
vec_to_df <- function(vec) { | ||
vec %>% | ||
as.list() %>% | ||
as_tibble() | ||
} | ||
|
||
pivot_collapsed_columns <- function(dat_row) { | ||
row_pivot <- dat_row %>% | ||
select(ends_with("_cat")) %>% | ||
t() %>% | ||
as.data.frame() | ||
long_row <- row_pivot %>% | ||
mutate(names = rownames(row_pivot)) %>% | ||
separate_wider_delim(V1, delim = ",", names_sep = "_") %>% | ||
mutate(names = str_replace(names, "_cat", "")) %>% | ||
pivot_longer(starts_with("V1")) %>% | ||
pivot_wider(names_from = names, values_from = value) %>% | ||
select(-name) | ||
return( | ||
dat_row %>% | ||
select(-ends_with("cat")) %>% | ||
bind_cols(long_row) %>% | ||
mutate(across(c("start", "end", "counts"), as.integer)) %>% | ||
mutate(peakID = glue("{chrom}:{start}-{end}")) %>% | ||
mutate(across(c("score", "signal", "pvalue", "qvalue"), as.numeric)) | ||
) | ||
} | ||
|
||
write_version <- function(version_file) { | ||
write_lines(get_version(), version_file) | ||
} | ||
|
||
get_version <- function() { | ||
return(paste0(R.version[["major"]], ".", R.version[["minor"]])) | ||
} | ||
|
||
|
||
main("versions.yml", "${merged_bed}", "${outfile}", | ||
n_cores = as.integer("${task.cpus}") | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.