From a2dc226c8934669cd17179adf393f6283d20619f Mon Sep 17 00:00:00 2001 From: "C. Navarro" Date: Wed, 7 Oct 2020 11:09:01 +0200 Subject: [PATCH 1/6] Added DESeq2 as dependency --- elsasserlib/DESCRIPTION | 5 +++-- elsasserlib/README.Md | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/elsasserlib/DESCRIPTION b/elsasserlib/DESCRIPTION index 288ce8c..b00320c 100644 --- a/elsasserlib/DESCRIPTION +++ b/elsasserlib/DESCRIPTION @@ -1,7 +1,7 @@ Package: elsasserlib Type: Package Title: General utilities used within Elsasser lab -Version: 1.0.9 +Version: 1.1.0 Authors@R: c( person("Carmen", "Navarro", email = "carmen.navarro@scilifelab.se", @@ -41,7 +41,8 @@ Imports: scales, RColorBrewer, RCurl, - pheatmap + pheatmap, + DESeq2 Encoding: UTF-8 LazyData: true RoxygenNote: 7.1.1 diff --git a/elsasserlib/README.Md b/elsasserlib/README.Md index 9381a47..8aeec9b 100644 --- a/elsasserlib/README.Md +++ b/elsasserlib/README.Md @@ -17,7 +17,8 @@ dependencies before running the installation: 'GenomicRanges', 'rtracklayer', 'BSgenome.Mmusculus.UCSC.mm9', - 'BSgenome.Hsapiens.UCSC.hg38')) + 'BSgenome.Hsapiens.UCSC.hg38', + 'DESeq2')) Then you can install directly from this GitHub repository: From f46c06de271149db9b1dfee39784e754cdf45a76 Mon Sep 17 00:00:00 2001 From: "C. Navarro" Date: Wed, 7 Oct 2020 11:53:32 +0200 Subject: [PATCH 2/6] bwstats with bins analysis --- elsasserlib/NAMESPACE | 7 +++ elsasserlib/R/bwstats.R | 77 ++++++++++++++++++++++++ elsasserlib/man/bw_bins_diff_analysis.Rd | 37 ++++++++++++ elsasserlib/man/get_nreads_columns.Rd | 20 ++++++ 4 files changed, 141 insertions(+) create mode 100644 elsasserlib/R/bwstats.R create mode 100644 elsasserlib/man/bw_bins_diff_analysis.Rd create mode 100644 elsasserlib/man/get_nreads_columns.Rd diff --git a/elsasserlib/NAMESPACE b/elsasserlib/NAMESPACE index 4db2e9d..5cd953e 100644 --- a/elsasserlib/NAMESPACE +++ b/elsasserlib/NAMESPACE @@ -3,6 +3,7 @@ export(build_bins) export(bw_bed) export(bw_bins) +export(bw_bins_diff_analysis) export(bw_profile) export(mean_ratio_norm) export(palette_categorical) @@ -19,6 +20,11 @@ export(trim_quantile) import(ggplot2) importFrom(BSgenome.Hsapiens.UCSC.hg38,BSgenome.Hsapiens.UCSC.hg38) importFrom(BSgenome.Mmusculus.UCSC.mm9,BSgenome.Mmusculus.UCSC.mm9) +importFrom(DESeq2,DESeqDataSetFromMatrix) +importFrom(DESeq2,`sizeFactors<-`) +importFrom(DESeq2,estimateDispersions) +importFrom(DESeq2,nbinomWaldTest) +importFrom(DESeq2,results) importFrom(GenomeInfoDb,seqinfo) importFrom(GenomeInfoDb,sortSeqlevels) importFrom(GenomicRanges,makeGRangesFromDataFrame) @@ -49,6 +55,7 @@ importFrom(rmarkdown,render) importFrom(rtracklayer,BigWigFile) importFrom(rtracklayer,import) importFrom(rtracklayer,mcols) +importFrom(stats,complete.cases) importFrom(stats,median) importFrom(stats,sd) importFrom(stringr,str_sort) diff --git a/elsasserlib/R/bwstats.R b/elsasserlib/R/bwstats.R new file mode 100644 index 0000000..ed112ed --- /dev/null +++ b/elsasserlib/R/bwstats.R @@ -0,0 +1,77 @@ +#' Run DESeq2 analysis on genome-wide bins +#' +#' Runs a DESeq2 analysis on genome-wide bins of a specified bin size. +#' The particularity of this analysis is that it skips the estimateSizeFactors +#' step by default, because this is accounted for in the scaling step of +#' MINUTE-ChIP samples. +#' +#' @param bwfiles_c1 Path or array of paths to the bigWig files for first condition. +#' @param bwfiles_c2 Path or array of paths to the bigWig files for second condition. +#' @param label_c1 Condition name for first condition +#' @param label_c2 Condition name for second condition +#' @param genome Genome. Available choices are mm9, hg38. +#' @param bin_size Bin size. +#' @importFrom DESeq2 DESeqDataSetFromMatrix estimateDispersions nbinomWaldTest `sizeFactors<-` results +#' @return a DESeqResults object as returned by DESeq2::results function +#' @export +bw_bins_diff_analysis <- function(bwfiles_c1, + bwfiles_c2, + label_c1, + label_c2, + bin_size = 10000, + genome = "mm9") { + + bins_c1 <- bw_bins(bwfiles_c1, genome = genome, bin_size = bin_size) + bins_c2 <- bw_bins(bwfiles_c2, genome = genome, bin_size = bin_size) + + cts_c1 <- get_nreads_columns(bins_c1) + cts_c2 <- get_nreads_columns(bins_c2) + + cts <- cbind(cts_c1, cts_c2) + + condition_labels <- c(rep(label_c1, ncol(cts_c1)), + rep(label_c2, ncol(cts_c2))) + + coldata <- data.frame(colnames(cts), condition = condition_labels) + + dds <- DESeqDataSetFromMatrix(countData = cts, + colData = coldata, + design = ~ condition) + + # Since files are scaled, we do not want to estimate size factors, so give it + # an array of ones + ncolumns <- length(bwfiles_c1) + length(bwfiles_c2) + sizeFactors(dds) <- c(rep(1, ncolumns)) + + dds <- estimateDispersions(dds) + dds <- nbinomWaldTest(dds) + + results(dds) +} + + +#' Get values in a granges object as round numeric values in a matrix. +#' +#' This is an auxiliary function for stats. It drops NAs or NaN values, only +#' complete cases are used. +#' +#' @param granges Target granges object +#' @param fraglen Estimated fragment length +#' +#' @return An integer matrix +#' @importFrom stats complete.cases +get_nreads_columns <- function(granges, fraglen = 150) { + # TODO: Consider whether to multiply by locus length. For bins analysis + # this should not affect results, but for genes or loci of different length + # it might. Since we skip the size factor step, we may bias the results? + # So right now it's only fragment length + length_factor <- fraglen + + bins.df <- data.frame(granges) + cts <- as.matrix(mcols(granges)) + cts <- as.matrix(cts[complete.cases(cts),]) + cts <- round(cts*length_factor) + cts +} + + diff --git a/elsasserlib/man/bw_bins_diff_analysis.Rd b/elsasserlib/man/bw_bins_diff_analysis.Rd new file mode 100644 index 0000000..f836fb1 --- /dev/null +++ b/elsasserlib/man/bw_bins_diff_analysis.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bwstats.R +\name{bw_bins_diff_analysis} +\alias{bw_bins_diff_analysis} +\title{Run DESeq2 analysis on genome-wide bins} +\usage{ +bw_bins_diff_analysis( + bwfiles_c1, + bwfiles_c2, + label_c1, + label_c2, + bin_size = 10000, + genome = "mm9" +) +} +\arguments{ +\item{bwfiles_c1}{Path or array of paths to the bigWig files for first condition.} + +\item{bwfiles_c2}{Path or array of paths to the bigWig files for second condition.} + +\item{label_c1}{Condition name for first condition} + +\item{label_c2}{Condition name for second condition} + +\item{bin_size}{Bin size.} + +\item{genome}{Genome. Available choices are mm9, hg38.} +} +\value{ +a DESeqResults object as returned by DESeq2::results function +} +\description{ +Runs a DESeq2 analysis on genome-wide bins of a specified bin size. +The particularity of this analysis is that it skips the estimateSizeFactors +step by default, because this is accounted for in the scaling step of +MINUTE-ChIP samples. +} diff --git a/elsasserlib/man/get_nreads_columns.Rd b/elsasserlib/man/get_nreads_columns.Rd new file mode 100644 index 0000000..b3fcc18 --- /dev/null +++ b/elsasserlib/man/get_nreads_columns.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bwstats.R +\name{get_nreads_columns} +\alias{get_nreads_columns} +\title{Get values in a granges object as round numeric values in a matrix.} +\usage{ +get_nreads_columns(granges, fraglen = 150) +} +\arguments{ +\item{granges}{Target granges object} + +\item{fraglen}{Estimated fragment length} +} +\value{ +An integer matrix +} +\description{ +This is an auxiliary function for stats. It drops NAs or NaN values, only +complete cases are used. +} From 7198c109852e896680b7149a3afc6ac455186381 Mon Sep 17 00:00:00 2001 From: "C. Navarro" Date: Wed, 7 Oct 2020 15:02:47 +0200 Subject: [PATCH 3/6] bw_bed_diff_analysis and refactor --- elsasserlib/NAMESPACE | 3 + elsasserlib/R/bwstats.R | 101 ++++++++++++++++---- elsasserlib/man/bw_bed_diff_analysis.Rd | 38 ++++++++ elsasserlib/man/bw_bins_diff_analysis.Rd | 10 +- elsasserlib/man/bw_granges_diff_analysis.Rd | 36 +++++++ elsasserlib/man/get_nreads_columns.Rd | 6 +- 6 files changed, 169 insertions(+), 25 deletions(-) create mode 100644 elsasserlib/man/bw_bed_diff_analysis.Rd create mode 100644 elsasserlib/man/bw_granges_diff_analysis.Rd diff --git a/elsasserlib/NAMESPACE b/elsasserlib/NAMESPACE index 5cd953e..02646a9 100644 --- a/elsasserlib/NAMESPACE +++ b/elsasserlib/NAMESPACE @@ -2,8 +2,10 @@ export(build_bins) export(bw_bed) +export(bw_bed_diff_analysis) export(bw_bins) export(bw_bins_diff_analysis) +export(bw_granges_diff_analysis) export(bw_profile) export(mean_ratio_norm) export(palette_categorical) @@ -23,6 +25,7 @@ importFrom(BSgenome.Mmusculus.UCSC.mm9,BSgenome.Mmusculus.UCSC.mm9) importFrom(DESeq2,DESeqDataSetFromMatrix) importFrom(DESeq2,`sizeFactors<-`) importFrom(DESeq2,estimateDispersions) +importFrom(DESeq2,estimateSizeFactors) importFrom(DESeq2,nbinomWaldTest) importFrom(DESeq2,results) importFrom(GenomeInfoDb,seqinfo) diff --git a/elsasserlib/R/bwstats.R b/elsasserlib/R/bwstats.R index ed112ed..f80597d 100644 --- a/elsasserlib/R/bwstats.R +++ b/elsasserlib/R/bwstats.R @@ -7,11 +7,9 @@ #' #' @param bwfiles_c1 Path or array of paths to the bigWig files for first condition. #' @param bwfiles_c2 Path or array of paths to the bigWig files for second condition. -#' @param label_c1 Condition name for first condition -#' @param label_c2 Condition name for second condition #' @param genome Genome. Available choices are mm9, hg38. #' @param bin_size Bin size. -#' @importFrom DESeq2 DESeqDataSetFromMatrix estimateDispersions nbinomWaldTest `sizeFactors<-` results +#' @inheritParams bw_granges_diff_analysis #' @return a DESeqResults object as returned by DESeq2::results function #' @export bw_bins_diff_analysis <- function(bwfiles_c1, @@ -19,18 +17,80 @@ bw_bins_diff_analysis <- function(bwfiles_c1, label_c1, label_c2, bin_size = 10000, - genome = "mm9") { + genome = "mm9", + estimate_size_factors = FALSE) { bins_c1 <- bw_bins(bwfiles_c1, genome = genome, bin_size = bin_size) bins_c2 <- bw_bins(bwfiles_c2, genome = genome, bin_size = bin_size) - cts_c1 <- get_nreads_columns(bins_c1) - cts_c2 <- get_nreads_columns(bins_c2) + bw_granges_diff_analysis(bins_c1, bins_c2, label_c1, label_c2, + estimate_size_factors = estimate_size_factors) +} + +#' Run DESeq2 analysis on bed file +#' +#' Runs a DESeq2 analysis on a set of loci specified in a BED file. +#' The particularity of this analysis is that it skips the estimateSizeFactors +#' step by default, because this is accounted for in the scaling step of +#' MINUTE-ChIP samples. +#' +#' @param bwfiles_c1 Path or array of paths to the bigWig files for first condition. +#' @param bwfiles_c2 Path or array of paths to the bigWig files for second condition. +#' @param bedfile BED file for locus specific analysis. +#' @inheritParams bw_granges_diff_analysis +#' @return a DESeqResults object as returned by DESeq2::results function +#' @export +bw_bed_diff_analysis <- function(bwfiles_c1, + bwfiles_c2, + bedfile, + label_c1, + label_c2, + estimate_size_factors = FALSE) { + + loci_c1 <- bw_bed(bwfiles_c1, bedfile = bedfile) + loci_c2 <- bw_bed(bwfiles_c2, bedfile = bedfile) + + bw_granges_diff_analysis(loci_c1, loci_c2, label_c1, label_c2, + estimate_size_factors = estimate_size_factors) +} - cts <- cbind(cts_c1, cts_c2) - condition_labels <- c(rep(label_c1, ncol(cts_c1)), - rep(label_c2, ncol(cts_c2))) +#' Compute DESeq2 differential analysis on GRanges objects +#' +#' Runs a DESeq2 analysis on loci specified on GRanges objects. +#' The particularity of this analysis is that it skips the estimateSizeFactors +#' step by default, because this is accounted for in the scaling step of +#' MINUTE-ChIP samples. +#' +#' @param granges_c1 GRanges object containing the values for condition 1. +#' @param granges_c2 GRanges object containing the values for condition 2. +#' Note that these objects must correspond to the same loci. +#' @param label_c1 Condition name for condition 1. +#' @param label_c2 Condition name for condition 2. +#' @param estimate_size_factors If TRUE, normal DESeq2 procedure is done. Set it +#' to true to analyze non-MINUTE data. +#' @importFrom DESeq2 DESeqDataSetFromMatrix estimateDispersions nbinomWaldTest `sizeFactors<-` results estimateSizeFactors +#' @return a DESeqResults object as returned by DESeq2::results function +#' @export +bw_granges_diff_analysis <- function(granges_c1, + granges_c2, + label_c1, + label_c2, + estimate_size_factors = FALSE) { + + # Bind first, get numbers after (drop complete cases separately could cause error) + granges_c1 <- sortSeqlevels(granges_c1) + granges_c1 <- sort(granges_c1) + + granges_c2 <- sortSeqlevels(granges_c2) + granges_c2 <- sort(granges_c2) + + cts_df <- cbind(data.frame(granges_c1), mcols(granges_c2)) + + cts <- get_nreads_columns(cts_df[, 6:ncol(cts_df)]) + + condition_labels <- c(rep(label_c1, length(mcols(granges_c1))), + rep(label_c2, length(mcols(granges_c2)))) coldata <- data.frame(colnames(cts), condition = condition_labels) @@ -38,10 +98,15 @@ bw_bins_diff_analysis <- function(bwfiles_c1, colData = coldata, design = ~ condition) - # Since files are scaled, we do not want to estimate size factors, so give it - # an array of ones - ncolumns <- length(bwfiles_c1) + length(bwfiles_c2) - sizeFactors(dds) <- c(rep(1, ncolumns)) + + if (estimate_size_factors == TRUE) { + dds <- estimateSizeFactors(dds) + } + else { + # Since files are scaled, we do not want to estimate size factors, so give it + # an array of ones + sizeFactors(dds) <- c(rep(1, ncol(cts))) + } dds <- estimateDispersions(dds) dds <- nbinomWaldTest(dds) @@ -50,25 +115,23 @@ bw_bins_diff_analysis <- function(bwfiles_c1, } -#' Get values in a granges object as round numeric values in a matrix. +#' Get values in a data frame object as round numeric values in a matrix. #' #' This is an auxiliary function for stats. It drops NAs or NaN values, only #' complete cases are used. #' -#' @param granges Target granges object +#' @param df Target data frame #' @param fraglen Estimated fragment length #' #' @return An integer matrix #' @importFrom stats complete.cases -get_nreads_columns <- function(granges, fraglen = 150) { +get_nreads_columns <- function(df, fraglen = 150) { # TODO: Consider whether to multiply by locus length. For bins analysis # this should not affect results, but for genes or loci of different length # it might. Since we skip the size factor step, we may bias the results? # So right now it's only fragment length length_factor <- fraglen - - bins.df <- data.frame(granges) - cts <- as.matrix(mcols(granges)) + cts <- as.matrix(df) cts <- as.matrix(cts[complete.cases(cts),]) cts <- round(cts*length_factor) cts diff --git a/elsasserlib/man/bw_bed_diff_analysis.Rd b/elsasserlib/man/bw_bed_diff_analysis.Rd new file mode 100644 index 0000000..abbe338 --- /dev/null +++ b/elsasserlib/man/bw_bed_diff_analysis.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bwstats.R +\name{bw_bed_diff_analysis} +\alias{bw_bed_diff_analysis} +\title{Run DESeq2 analysis on bed file} +\usage{ +bw_bed_diff_analysis( + bwfiles_c1, + bwfiles_c2, + bedfile, + label_c1, + label_c2, + estimate_size_factors = FALSE +) +} +\arguments{ +\item{bwfiles_c1}{Path or array of paths to the bigWig files for first condition.} + +\item{bwfiles_c2}{Path or array of paths to the bigWig files for second condition.} + +\item{bedfile}{BED file for locus specific analysis.} + +\item{label_c1}{Condition name for condition 1.} + +\item{label_c2}{Condition name for condition 2.} + +\item{estimate_size_factors}{If TRUE, normal DESeq2 procedure is done. Set it +to true to analyze non-MINUTE data.} +} +\value{ +a DESeqResults object as returned by DESeq2::results function +} +\description{ +Runs a DESeq2 analysis on a set of loci specified in a BED file. +The particularity of this analysis is that it skips the estimateSizeFactors +step by default, because this is accounted for in the scaling step of +MINUTE-ChIP samples. +} diff --git a/elsasserlib/man/bw_bins_diff_analysis.Rd b/elsasserlib/man/bw_bins_diff_analysis.Rd index f836fb1..7fab21d 100644 --- a/elsasserlib/man/bw_bins_diff_analysis.Rd +++ b/elsasserlib/man/bw_bins_diff_analysis.Rd @@ -10,7 +10,8 @@ bw_bins_diff_analysis( label_c1, label_c2, bin_size = 10000, - genome = "mm9" + genome = "mm9", + estimate_size_factors = FALSE ) } \arguments{ @@ -18,13 +19,16 @@ bw_bins_diff_analysis( \item{bwfiles_c2}{Path or array of paths to the bigWig files for second condition.} -\item{label_c1}{Condition name for first condition} +\item{label_c1}{Condition name for condition 1.} -\item{label_c2}{Condition name for second condition} +\item{label_c2}{Condition name for condition 2.} \item{bin_size}{Bin size.} \item{genome}{Genome. Available choices are mm9, hg38.} + +\item{estimate_size_factors}{If TRUE, normal DESeq2 procedure is done. Set it +to true to analyze non-MINUTE data.} } \value{ a DESeqResults object as returned by DESeq2::results function diff --git a/elsasserlib/man/bw_granges_diff_analysis.Rd b/elsasserlib/man/bw_granges_diff_analysis.Rd new file mode 100644 index 0000000..0134e52 --- /dev/null +++ b/elsasserlib/man/bw_granges_diff_analysis.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bwstats.R +\name{bw_granges_diff_analysis} +\alias{bw_granges_diff_analysis} +\title{Compute DESeq2 differential analysis on GRanges objects} +\usage{ +bw_granges_diff_analysis( + granges_c1, + granges_c2, + label_c1, + label_c2, + estimate_size_factors = FALSE +) +} +\arguments{ +\item{granges_c1}{GRanges object containing the values for condition 1.} + +\item{granges_c2}{GRanges object containing the values for condition 2. +Note that these objects must correspond to the same loci.} + +\item{label_c1}{Condition name for condition 1.} + +\item{label_c2}{Condition name for condition 2.} + +\item{estimate_size_factors}{If TRUE, normal DESeq2 procedure is done. Set it +to true to analyze non-MINUTE data.} +} +\value{ +a DESeqResults object as returned by DESeq2::results function +} +\description{ +Runs a DESeq2 analysis on loci specified on GRanges objects. +The particularity of this analysis is that it skips the estimateSizeFactors +step by default, because this is accounted for in the scaling step of +MINUTE-ChIP samples. +} diff --git a/elsasserlib/man/get_nreads_columns.Rd b/elsasserlib/man/get_nreads_columns.Rd index b3fcc18..41b785a 100644 --- a/elsasserlib/man/get_nreads_columns.Rd +++ b/elsasserlib/man/get_nreads_columns.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/bwstats.R \name{get_nreads_columns} \alias{get_nreads_columns} -\title{Get values in a granges object as round numeric values in a matrix.} +\title{Get values in a data frame object as round numeric values in a matrix.} \usage{ -get_nreads_columns(granges, fraglen = 150) +get_nreads_columns(df, fraglen = 150) } \arguments{ -\item{granges}{Target granges object} +\item{df}{Target data frame} \item{fraglen}{Estimated fragment length} } From f93316da1de244a5259a3bf32c2e212ffa11eddf Mon Sep 17 00:00:00 2001 From: "C. Navarro" Date: Wed, 7 Oct 2020 15:03:07 +0200 Subject: [PATCH 4/6] DESeq2 dependency on CI --- .github/workflows/R-CMD-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index c7ed99d..d6397c6 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -67,7 +67,7 @@ jobs: - name: Manually install BioConductor dependencies run: | install.packages("BiocManager") - BiocManager::install(c("rtracklayer", "GenomeInfoDb", "GenomicRanges", "BSgenome.Mmusculus.UCSC.mm9", "BSgenome.Hsapiens.UCSC.hg38")) + BiocManager::install(c("rtracklayer", "GenomeInfoDb", "GenomicRanges", "BSgenome.Mmusculus.UCSC.mm9", "BSgenome.Hsapiens.UCSC.hg38", "DESeq2")) shell: Rscript {0} working-directory: elsasserlib From 2c817e7f115a13f4a32faf066930b4375bdcfd42 Mon Sep 17 00:00:00 2001 From: "C. Navarro" Date: Wed, 7 Oct 2020 16:19:22 +0200 Subject: [PATCH 5/6] Length factor scaled to 1000 as default --- elsasserlib/R/bwstats.R | 5 ++--- elsasserlib/man/get_nreads_columns.Rd | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/elsasserlib/R/bwstats.R b/elsasserlib/R/bwstats.R index f80597d..b88adb7 100644 --- a/elsasserlib/R/bwstats.R +++ b/elsasserlib/R/bwstats.R @@ -121,16 +121,15 @@ bw_granges_diff_analysis <- function(granges_c1, #' complete cases are used. #' #' @param df Target data frame -#' @param fraglen Estimated fragment length +#' @param length_factor Scaling factor to multiply coverage values by. #' #' @return An integer matrix #' @importFrom stats complete.cases -get_nreads_columns <- function(df, fraglen = 150) { +get_nreads_columns <- function(df, length_factor = 1000) { # TODO: Consider whether to multiply by locus length. For bins analysis # this should not affect results, but for genes or loci of different length # it might. Since we skip the size factor step, we may bias the results? # So right now it's only fragment length - length_factor <- fraglen cts <- as.matrix(df) cts <- as.matrix(cts[complete.cases(cts),]) cts <- round(cts*length_factor) diff --git a/elsasserlib/man/get_nreads_columns.Rd b/elsasserlib/man/get_nreads_columns.Rd index 41b785a..b25de3e 100644 --- a/elsasserlib/man/get_nreads_columns.Rd +++ b/elsasserlib/man/get_nreads_columns.Rd @@ -4,12 +4,12 @@ \alias{get_nreads_columns} \title{Get values in a data frame object as round numeric values in a matrix.} \usage{ -get_nreads_columns(df, fraglen = 150) +get_nreads_columns(df, length_factor = 1000) } \arguments{ \item{df}{Target data frame} -\item{fraglen}{Estimated fragment length} +\item{length_factor}{Scaling factor to multiply coverage values by.} } \value{ An integer matrix From 888c23b9b7cd9518d0a576fd3ee88c44ea7e1033 Mon Sep 17 00:00:00 2001 From: "C. Navarro" Date: Wed, 7 Oct 2020 17:38:41 +0200 Subject: [PATCH 6/6] few parameter tests --- elsasserlib/tests/testthat/test_bwstats.R | 110 ++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 elsasserlib/tests/testthat/test_bwstats.R diff --git a/elsasserlib/tests/testthat/test_bwstats.R b/elsasserlib/tests/testthat/test_bwstats.R new file mode 100644 index 0000000..b13ac9e --- /dev/null +++ b/elsasserlib/tests/testthat/test_bwstats.R @@ -0,0 +1,110 @@ +context("Test functions for bigWig stats") +library(GenomicRanges) +library(testthat) +library(mockery) + +get_file_path <- function(filename) { + system.file("extdata", filename, package = "elsasserlib") +} + +bw1 <- get_file_path("ES_H33_00h_rep1_hoxc.bw") +bw2 <- get_file_path("ES_H33_03h_rep1_hoxc.bw") +bg_bw <- get_file_path("ES_H33_inp_rep1_hoxc.bw") +bed <- get_file_path("chromhmm_hoxc.bed") + +bw_limits <- GRanges(seqnames = c("chr15"), + ranges = IRanges(c(102723600, 102959000))) + +reduced_bins <- bw_bins(c(bw1, bw2), selection = bw_limits) +reduced_bg_bins <- bw_bins(c(bg_bw), selection = bw_limits) + +test_that("Setup files exist", { + expect_true(file_test("-f", bw1)) + expect_true(file_test("-f", bw2)) + expect_true(file_test("-f", bg_bw)) + expect_true(file_test("-f", bed)) +}) + +test_that("bw_bins_diff_analysis passes on parameters", { + m_func <- mock() + m_bins <- mock(reduced_bins, reduced_bg_bins) + with_mock( + bw_granges_diff_analysis = m_func, + bw_bins = m_bins, + bw_bins_diff_analysis(c(bw1, bw2), bg_bw, "treated", "untreated") + ) + expect_call(m_func, 1, + bw_granges_diff_analysis(bins_c1, + bins_c2, + label_c1, + label_c2, + estimate_size_factors = estimate_size_factors) + ) + + expect_call(m_bins, 1, + bw_bins(bwfiles_c1, genome = genome, bin_size = bin_size) + ) + + expect_call(m_bins, 2, + bw_bins(bwfiles_c2, genome = genome, bin_size = bin_size) + ) + + expect_args(m_func, 1, + granges_c1 = reduced_bins, + granges_c2 = reduced_bg_bins, + label_c1 = "treated", + label_c2 = "untreated", + estimate_size_factors = FALSE) + + expect_args(m_bins, 1, + c(bw1, bw2), + genome = "mm9", + bin_size = 10000) + + expect_args(m_bins, 2, + bg_bw, + genome = "mm9", + bin_size = 10000) +}) + + +test_that("bw_bed_diff_analysis passes on parameters", { + m_func <- mock() + m_bed <- mock(reduced_bins, reduced_bg_bins) + with_mock( + bw_granges_diff_analysis = m_func, + bw_bed = m_bed, + bw_bed_diff_analysis(c(bw1, bw2), bg_bw, bed, "treated", "untreated") + ) + expect_call(m_func, 1, + bw_granges_diff_analysis(loci_c1, + loci_c2, + label_c1, + label_c2, + estimate_size_factors = estimate_size_factors) + ) + + expect_call(m_bed, 1, + bw_bed(bwfiles_c1, bedfile = bedfile) + ) + + expect_call(m_bed, 2, + bw_bed(bwfiles_c2, bedfile = bedfile) + ) + + expect_args(m_func, 1, + granges_c1 = reduced_bins, + granges_c2 = reduced_bg_bins, + label_c1 = "treated", + label_c2 = "untreated", + estimate_size_factors = FALSE) + + expect_args(m_bed, 1, + c(bw1, bw2), + bedfile = bed) + + expect_args(m_bed, 2, + bg_bw, + bedfile = bed) +}) +