Skip to content

Commit

Permalink
Merge pull request #39 from CCBR/cons-peak-norm
Browse files Browse the repository at this point in the history
feat: collapse peak p-values in bedtools/merge, reformat with new process
  • Loading branch information
kelly-sovacool authored Dec 1, 2023
2 parents 0dd8b5e + d0ddb29 commit d1cb7d8
Show file tree
Hide file tree
Showing 34 changed files with 561 additions and 197 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ work/
/.quarto/
.Rproj.user
*.Rproj
.Rbuildignore
8 changes: 5 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,19 @@

- bedops/bedmap (#37)
- bedtools/map (#37)
- bedtools/merge (#37)
- bedtools/merge (#37,#39)
- bedtools/sort (#37)
- cat/cat (#37)
- cat/fastq (#37)
- custom/combinepeakcounts (#37)
- custom/consensuspeaks (#37)
- custom/normalizepeaks (#37)
- custom/formatmergedbed (#39)
- custom/normalizepeaks (#37,#39)
- sort/bed (#39)

### New subworkflows

- consensus_peaks (#37)
- consensus_peaks (#37,#39)

## nf-modules 0.1.0

Expand Down
7 changes: 3 additions & 4 deletions modules/CCBR/bedtools/merge/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ process BEDTOOLS_MERGE {

input:
tuple val(meta), path(bed)
val(args)

output:
tuple val(meta), path('*.bed'), emit: bed
Expand All @@ -15,9 +16,7 @@ process BEDTOOLS_MERGE {
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}.merged"
if ("$bed" == "${prefix}.bed") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
def prefix = "${bed.baseName}.merged"
"""
bedtools \\
merge \\
Expand All @@ -32,7 +31,7 @@ process BEDTOOLS_MERGE {
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}.merged"
def prefix = "${bed.baseName}.merged"
"""
touch ${prefix}.bed
Expand Down
3 changes: 3 additions & 0 deletions modules/CCBR/bedtools/merge/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ input:
type: file
description: Input BED file
pattern: "*.{bed}"
- args:
type: string
description: optional arguments for bedtools merge
output:
- meta:
type: map
Expand Down
2 changes: 1 addition & 1 deletion modules/CCBR/bedtools/sort/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ process BEDTOOLS_SORT {

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}.sorted"
def prefix = task.ext.prefix ?: "${intervals.baseName}.sorted"
def genome_cmd = genome_file ? "-g $genome_file" : ""
extension = task.ext.suffix ?: intervals.extension
if ("$intervals" == "${prefix}.${extension}") {
Expand Down
2 changes: 1 addition & 1 deletion modules/CCBR/cat/cat/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ process CAT_CAT {
script:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def file_list = files_in.collect { it.toString() }
def file_list = files_in.sort({ a, b -> a.baseName <=> b.baseName }).collect{ it.toString() }

// | input | output | command1 | command2 |
// |-----------|------------|----------|----------|
Expand Down
5 changes: 3 additions & 2 deletions modules/CCBR/custom/combinepeakcounts/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@ process CUSTOM_COMBINEPEAKCOUNTS {
template 'combine_peaks.R'

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
prefix = task.ext.prefix ?: "${meta.id}"
outfile = "${prefix}.consensus.bed"
"""
touch ${prefix}.consensus.bed
touch ${outfile}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
34 changes: 23 additions & 11 deletions modules/CCBR/custom/combinepeakcounts/templates/combine_peaks.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,19 @@ library(stringr)
library(readr)
library(tidyr)

main <- function() {
write_lines(get_version(), "versions.yml")
dat <- combine_peaks(unlist(str_split("${count_files}", ",")))
write_tsv(dat, "${outfile}", col_names = FALSE)
main <- function(version_file = "versions.yml",
count_files = unlist(str_split("${count_files}", ",")),
out_file = "${outfile}") {
write_lines(get_version(), version_file)
dat <- combine_peak_counts(count_files)
write_tsv(dat, out_file, col_names = FALSE)
}

get_version <- function() {
return(paste0(R.version[["major"]], ".", R.version[["minor"]]))
}

combine_peaks <- function(count_files) {
combine_peak_counts <- function(count_files) {
count_dat <- count_files %>%
map(function(file) {
dat <- read_tsv(file, col_names = FALSE)
Expand All @@ -35,12 +37,22 @@ combine_peaks <- function(count_files) {
return(count_dat)
}

join_peaks <- function(peakfiles) {
return()
}

normalize_scores <- function(dat) {
return()
read_peaks <- function(peak_file) {
peak_colnames <- c(
"chrom",
"start",
"end",
"peakID",
"score",
"strand",
"signal",
"pvalue",
"qvalue",
"peak"
)
peaks <- read_tsv(peak_file, col_names = FALSE)
colnames(peaks) <- peak_colnames[seq_len(ncol(peaks))]
return(peaks)
}

main()
30 changes: 30 additions & 0 deletions modules/CCBR/custom/formatmergedbed/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
process CUSTOM_FORMATMERGEDBED {
tag { meta.id }
label 'process_medium'

container 'nciccbr/consensus_peaks:v1.1'

input:
tuple val(meta), path(merged_bed)

output:
tuple val(meta), path("*.bed"), emit: bed
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
outfile = "${merged_bed.baseName}.consensus.bed"
template 'format_merged_bed.R'

stub:
"""
touch ${merged_bed.baseName}.consensus.bed
cat <<-END_VERSIONS > versions.yml
"${task.process}":
: \$(echo \$(R --version | grep 'R version' | sed 's/R version //; s/ (.*//'))
END_VERSIONS
"""
}
50 changes: 50 additions & 0 deletions modules/CCBR/custom/formatmergedbed/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json
name: "custom_formatmergedbed"
description: |
Reformat consensus peaks from bedtools merge.
Used in the consensus_peaks subworkflow.
keywords:
- chipseq
- peaks
- consensus
- bedtools
tools:
- "R":
description: "R is a free software environment for statistical computing and graphics"
homepage: "https://www.r-project.org/"
licence: ["GPL-3"]

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`
- merged_bed:
type: file
description: |
Merged output file from calling
`bedtools merge -c 1,5,6,7,8,9 -o count,collapse,collapse,collapse,collapse,collapse`
on a concatenated & sorted peak file
pattern: "*.bed"

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'test', single_end:false ]`
- bed:
type: file
description: |
A narrow peak bed file with the best p-value for each consensus peak
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@kelly-sovacool"
maintainers:
- "@kelly-sovacool"
95 changes: 95 additions & 0 deletions modules/CCBR/custom/formatmergedbed/templates/format_merged_bed.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#!/usr/bin/env Rscript
library(dplyr)
library(glue)
library(purrr)
library(stringr)
library(readr)
library(tidyr)

main <- function(version_file = "versions.yml",
merged_file = "${merged_bed}",
out_file = "${outfile}",
n_cores = as.integer("${task.cpus}"),
min_count = 1) {
doFuture::registerDoFuture()
future::plan(future::multicore, workers = n_cores)
write_version(version_file)
merged_dat <- read_tsv(merged_file,
col_names = FALSE,
col_types = "ciiiccccc"
)
if (nrow(merged_dat) == 0) {
stop("The merged bed file is empty")
}
colnames(merged_dat) <- c(
"chrom", "start", "end",
"counts", "score_cat", "strand_cat",
"signal_cat", "pvalue_cat", "qvalue_cat"
)
merged_dat %>%
filter(counts >= min_count) %>%
future.apply::future_apply(1, select_best_peak) %>%
bind_rows() %>%
select(
"chrom",
"start",
"end",
"peakID",
"score",
"strand",
"signal",
"pvalue",
"qvalue"
) %>%
write_tsv(out_file, col_names = FALSE)
}

select_best_peak <- function(dat_row) {
return(
dat_row %>%
vec_to_df() %>%
pivot_collapsed_columns() %>%
slice_max(pvalue)
)
}

vec_to_df <- function(vec) {
vec %>%
as.list() %>%
as_tibble()
}

pivot_collapsed_columns <- function(dat_row) {
row_pivot <- dat_row %>%
select(ends_with("_cat")) %>%
t() %>%
as.data.frame()
long_row <- row_pivot %>%
mutate(names = rownames(row_pivot)) %>%
separate_wider_delim(V1, delim = ",", names_sep = "_") %>%
mutate(names = str_replace(names, "_cat", "")) %>%
pivot_longer(starts_with("V1")) %>%
pivot_wider(names_from = names, values_from = value) %>%
select(-name)
return(
dat_row %>%
select(-ends_with("cat")) %>%
bind_cols(long_row) %>%
mutate(across(c("start", "end", "counts"), as.integer)) %>%
mutate(peakID = glue("{chrom}:{start}-{end}")) %>%
mutate(across(c("score", "signal", "pvalue", "qvalue"), as.numeric))
)
}

write_version <- function(version_file) {
write_lines(get_version(), version_file)
}

get_version <- function() {
return(paste0(R.version[["major"]], ".", R.version[["minor"]]))
}


main("versions.yml", "${merged_bed}", "${outfile}",
n_cores = as.integer("${task.cpus}")
)
6 changes: 3 additions & 3 deletions modules/CCBR/custom/normalizepeaks/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ process CUSTOM_NORMALIZEPEAKS {
container 'nciccbr/spacesavers2:0.1.1'

input:
tuple val(meta), path(count), path(peaks)
tuple val(meta), path(peak)

output:
tuple val(meta), path("*norm.bed"), emit: bed
Expand All @@ -19,12 +19,12 @@ process CUSTOM_NORMALIZEPEAKS {

script:
prefix = task.ext.prefix ?: "${meta.id}"
outfile = "${count.baseName}.norm.bed"
outfile = "${peak}.norm.bed"
template 'normalize_peaks.R'

stub:
"""
touch ${count.baseName}.norm.bed
touch ${peak}.norm.bed
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
Loading

0 comments on commit d1cb7d8

Please sign in to comment.