From a2dc226c8934669cd17179adf393f6283d20619f Mon Sep 17 00:00:00 2001
From: "C. Navarro" <cnluzon@gmail.com>
Date: Wed, 7 Oct 2020 11:09:01 +0200
Subject: [PATCH 1/6] Added DESeq2 as dependency

---
 elsasserlib/DESCRIPTION | 5 +++--
 elsasserlib/README.Md   | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/elsasserlib/DESCRIPTION b/elsasserlib/DESCRIPTION
index 288ce8c..b00320c 100644
--- a/elsasserlib/DESCRIPTION
+++ b/elsasserlib/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: elsasserlib
 Type: Package
 Title: General utilities used within Elsasser lab
-Version: 1.0.9
+Version: 1.1.0
 Authors@R: c( 
   person("Carmen", "Navarro", 
          email = "carmen.navarro@scilifelab.se",
@@ -41,7 +41,8 @@ Imports:
   scales,
   RColorBrewer,
   RCurl,
-  pheatmap
+  pheatmap,
+  DESeq2
 Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.1.1
diff --git a/elsasserlib/README.Md b/elsasserlib/README.Md
index 9381a47..8aeec9b 100644
--- a/elsasserlib/README.Md
+++ b/elsasserlib/README.Md
@@ -17,7 +17,8 @@ dependencies before running the installation:
         'GenomicRanges',
         'rtracklayer',
         'BSgenome.Mmusculus.UCSC.mm9',
-        'BSgenome.Hsapiens.UCSC.hg38'))
+        'BSgenome.Hsapiens.UCSC.hg38',
+        'DESeq2'))
 
 Then you can install directly from this GitHub repository:
 

From f46c06de271149db9b1dfee39784e754cdf45a76 Mon Sep 17 00:00:00 2001
From: "C. Navarro" <cnluzon@gmail.com>
Date: Wed, 7 Oct 2020 11:53:32 +0200
Subject: [PATCH 2/6] bwstats with bins analysis

---
 elsasserlib/NAMESPACE                    |  7 +++
 elsasserlib/R/bwstats.R                  | 77 ++++++++++++++++++++++++
 elsasserlib/man/bw_bins_diff_analysis.Rd | 37 ++++++++++++
 elsasserlib/man/get_nreads_columns.Rd    | 20 ++++++
 4 files changed, 141 insertions(+)
 create mode 100644 elsasserlib/R/bwstats.R
 create mode 100644 elsasserlib/man/bw_bins_diff_analysis.Rd
 create mode 100644 elsasserlib/man/get_nreads_columns.Rd

diff --git a/elsasserlib/NAMESPACE b/elsasserlib/NAMESPACE
index 4db2e9d..5cd953e 100644
--- a/elsasserlib/NAMESPACE
+++ b/elsasserlib/NAMESPACE
@@ -3,6 +3,7 @@
 export(build_bins)
 export(bw_bed)
 export(bw_bins)
+export(bw_bins_diff_analysis)
 export(bw_profile)
 export(mean_ratio_norm)
 export(palette_categorical)
@@ -19,6 +20,11 @@ export(trim_quantile)
 import(ggplot2)
 importFrom(BSgenome.Hsapiens.UCSC.hg38,BSgenome.Hsapiens.UCSC.hg38)
 importFrom(BSgenome.Mmusculus.UCSC.mm9,BSgenome.Mmusculus.UCSC.mm9)
+importFrom(DESeq2,DESeqDataSetFromMatrix)
+importFrom(DESeq2,`sizeFactors<-`)
+importFrom(DESeq2,estimateDispersions)
+importFrom(DESeq2,nbinomWaldTest)
+importFrom(DESeq2,results)
 importFrom(GenomeInfoDb,seqinfo)
 importFrom(GenomeInfoDb,sortSeqlevels)
 importFrom(GenomicRanges,makeGRangesFromDataFrame)
@@ -49,6 +55,7 @@ importFrom(rmarkdown,render)
 importFrom(rtracklayer,BigWigFile)
 importFrom(rtracklayer,import)
 importFrom(rtracklayer,mcols)
+importFrom(stats,complete.cases)
 importFrom(stats,median)
 importFrom(stats,sd)
 importFrom(stringr,str_sort)
diff --git a/elsasserlib/R/bwstats.R b/elsasserlib/R/bwstats.R
new file mode 100644
index 0000000..ed112ed
--- /dev/null
+++ b/elsasserlib/R/bwstats.R
@@ -0,0 +1,77 @@
+#' Run DESeq2 analysis on genome-wide bins
+#'
+#' Runs a DESeq2 analysis on genome-wide bins of a specified bin size.
+#' The particularity of this analysis is that it skips the estimateSizeFactors
+#' step by default, because this is accounted for in the scaling step of
+#' MINUTE-ChIP samples.
+#'
+#' @param bwfiles_c1 Path or array of paths to the bigWig files for first condition.
+#' @param bwfiles_c2 Path or array of paths to the bigWig files for second condition.
+#' @param label_c1 Condition name for first condition
+#' @param label_c2 Condition name for second condition
+#' @param genome Genome. Available choices are mm9, hg38.
+#' @param bin_size Bin size.
+#' @importFrom DESeq2 DESeqDataSetFromMatrix estimateDispersions nbinomWaldTest `sizeFactors<-` results
+#' @return a DESeqResults object as returned by DESeq2::results function
+#' @export
+bw_bins_diff_analysis <- function(bwfiles_c1,
+                                  bwfiles_c2,
+                                  label_c1,
+                                  label_c2,
+                                  bin_size = 10000,
+                                  genome = "mm9") {
+
+  bins_c1 <- bw_bins(bwfiles_c1, genome = genome, bin_size = bin_size)
+  bins_c2 <- bw_bins(bwfiles_c2, genome = genome, bin_size = bin_size)
+
+  cts_c1 <- get_nreads_columns(bins_c1)
+  cts_c2 <- get_nreads_columns(bins_c2)
+
+  cts <- cbind(cts_c1, cts_c2)
+
+  condition_labels <- c(rep(label_c1, ncol(cts_c1)),
+                        rep(label_c2, ncol(cts_c2)))
+
+  coldata <- data.frame(colnames(cts), condition = condition_labels)
+
+  dds <- DESeqDataSetFromMatrix(countData = cts,
+                                colData = coldata,
+                                design = ~ condition)
+
+  # Since files are scaled, we do not want to estimate size factors, so give it
+  # an array of ones
+  ncolumns <- length(bwfiles_c1) + length(bwfiles_c2)
+  sizeFactors(dds) <- c(rep(1, ncolumns))
+
+  dds <- estimateDispersions(dds)
+  dds <- nbinomWaldTest(dds)
+
+  results(dds)
+}
+
+
+#' Get values in a granges object as round numeric values in a matrix.
+#'
+#' This is an auxiliary function for stats. It drops NAs or NaN values, only
+#' complete cases are used.
+#'
+#' @param granges Target granges object
+#' @param fraglen Estimated fragment length
+#'
+#' @return An integer matrix
+#' @importFrom stats complete.cases
+get_nreads_columns <- function(granges, fraglen = 150) {
+  # TODO: Consider whether to multiply by locus length. For bins analysis
+  # this should not affect results, but for genes or loci of different length
+  # it might. Since we skip the size factor step, we may bias the results?
+  # So right now it's only fragment length
+  length_factor <- fraglen
+
+  bins.df <- data.frame(granges)
+  cts <- as.matrix(mcols(granges))
+  cts <- as.matrix(cts[complete.cases(cts),])
+  cts <- round(cts*length_factor)
+  cts
+}
+
+
diff --git a/elsasserlib/man/bw_bins_diff_analysis.Rd b/elsasserlib/man/bw_bins_diff_analysis.Rd
new file mode 100644
index 0000000..f836fb1
--- /dev/null
+++ b/elsasserlib/man/bw_bins_diff_analysis.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/bwstats.R
+\name{bw_bins_diff_analysis}
+\alias{bw_bins_diff_analysis}
+\title{Run DESeq2 analysis on genome-wide bins}
+\usage{
+bw_bins_diff_analysis(
+  bwfiles_c1,
+  bwfiles_c2,
+  label_c1,
+  label_c2,
+  bin_size = 10000,
+  genome = "mm9"
+)
+}
+\arguments{
+\item{bwfiles_c1}{Path or array of paths to the bigWig files for first condition.}
+
+\item{bwfiles_c2}{Path or array of paths to the bigWig files for second condition.}
+
+\item{label_c1}{Condition name for first condition}
+
+\item{label_c2}{Condition name for second condition}
+
+\item{bin_size}{Bin size.}
+
+\item{genome}{Genome. Available choices are mm9, hg38.}
+}
+\value{
+a DESeqResults object as returned by DESeq2::results function
+}
+\description{
+Runs a DESeq2 analysis on genome-wide bins of a specified bin size.
+The particularity of this analysis is that it skips the estimateSizeFactors
+step by default, because this is accounted for in the scaling step of
+MINUTE-ChIP samples.
+}
diff --git a/elsasserlib/man/get_nreads_columns.Rd b/elsasserlib/man/get_nreads_columns.Rd
new file mode 100644
index 0000000..b3fcc18
--- /dev/null
+++ b/elsasserlib/man/get_nreads_columns.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/bwstats.R
+\name{get_nreads_columns}
+\alias{get_nreads_columns}
+\title{Get values in a granges object as round numeric values in a matrix.}
+\usage{
+get_nreads_columns(granges, fraglen = 150)
+}
+\arguments{
+\item{granges}{Target granges object}
+
+\item{fraglen}{Estimated fragment length}
+}
+\value{
+An integer matrix
+}
+\description{
+This is an auxiliary function for stats. It drops NAs or NaN values, only
+complete cases are used.
+}

From 7198c109852e896680b7149a3afc6ac455186381 Mon Sep 17 00:00:00 2001
From: "C. Navarro" <cnluzon@gmail.com>
Date: Wed, 7 Oct 2020 15:02:47 +0200
Subject: [PATCH 3/6] bw_bed_diff_analysis and refactor

---
 elsasserlib/NAMESPACE                       |   3 +
 elsasserlib/R/bwstats.R                     | 101 ++++++++++++++++----
 elsasserlib/man/bw_bed_diff_analysis.Rd     |  38 ++++++++
 elsasserlib/man/bw_bins_diff_analysis.Rd    |  10 +-
 elsasserlib/man/bw_granges_diff_analysis.Rd |  36 +++++++
 elsasserlib/man/get_nreads_columns.Rd       |   6 +-
 6 files changed, 169 insertions(+), 25 deletions(-)
 create mode 100644 elsasserlib/man/bw_bed_diff_analysis.Rd
 create mode 100644 elsasserlib/man/bw_granges_diff_analysis.Rd

diff --git a/elsasserlib/NAMESPACE b/elsasserlib/NAMESPACE
index 5cd953e..02646a9 100644
--- a/elsasserlib/NAMESPACE
+++ b/elsasserlib/NAMESPACE
@@ -2,8 +2,10 @@
 
 export(build_bins)
 export(bw_bed)
+export(bw_bed_diff_analysis)
 export(bw_bins)
 export(bw_bins_diff_analysis)
+export(bw_granges_diff_analysis)
 export(bw_profile)
 export(mean_ratio_norm)
 export(palette_categorical)
@@ -23,6 +25,7 @@ importFrom(BSgenome.Mmusculus.UCSC.mm9,BSgenome.Mmusculus.UCSC.mm9)
 importFrom(DESeq2,DESeqDataSetFromMatrix)
 importFrom(DESeq2,`sizeFactors<-`)
 importFrom(DESeq2,estimateDispersions)
+importFrom(DESeq2,estimateSizeFactors)
 importFrom(DESeq2,nbinomWaldTest)
 importFrom(DESeq2,results)
 importFrom(GenomeInfoDb,seqinfo)
diff --git a/elsasserlib/R/bwstats.R b/elsasserlib/R/bwstats.R
index ed112ed..f80597d 100644
--- a/elsasserlib/R/bwstats.R
+++ b/elsasserlib/R/bwstats.R
@@ -7,11 +7,9 @@
 #'
 #' @param bwfiles_c1 Path or array of paths to the bigWig files for first condition.
 #' @param bwfiles_c2 Path or array of paths to the bigWig files for second condition.
-#' @param label_c1 Condition name for first condition
-#' @param label_c2 Condition name for second condition
 #' @param genome Genome. Available choices are mm9, hg38.
 #' @param bin_size Bin size.
-#' @importFrom DESeq2 DESeqDataSetFromMatrix estimateDispersions nbinomWaldTest `sizeFactors<-` results
+#' @inheritParams bw_granges_diff_analysis
 #' @return a DESeqResults object as returned by DESeq2::results function
 #' @export
 bw_bins_diff_analysis <- function(bwfiles_c1,
@@ -19,18 +17,80 @@ bw_bins_diff_analysis <- function(bwfiles_c1,
                                   label_c1,
                                   label_c2,
                                   bin_size = 10000,
-                                  genome = "mm9") {
+                                  genome = "mm9",
+                                  estimate_size_factors = FALSE) {
 
   bins_c1 <- bw_bins(bwfiles_c1, genome = genome, bin_size = bin_size)
   bins_c2 <- bw_bins(bwfiles_c2, genome = genome, bin_size = bin_size)
 
-  cts_c1 <- get_nreads_columns(bins_c1)
-  cts_c2 <- get_nreads_columns(bins_c2)
+  bw_granges_diff_analysis(bins_c1, bins_c2, label_c1, label_c2,
+                           estimate_size_factors = estimate_size_factors)
+}
+
+#' Run DESeq2 analysis on bed file
+#'
+#' Runs a DESeq2 analysis on a set of loci specified in a BED file.
+#' The particularity of this analysis is that it skips the estimateSizeFactors
+#' step by default, because this is accounted for in the scaling step of
+#' MINUTE-ChIP samples.
+#'
+#' @param bwfiles_c1 Path or array of paths to the bigWig files for first condition.
+#' @param bwfiles_c2 Path or array of paths to the bigWig files for second condition.
+#' @param bedfile BED file for locus specific analysis.
+#' @inheritParams bw_granges_diff_analysis
+#' @return a DESeqResults object as returned by DESeq2::results function
+#' @export
+bw_bed_diff_analysis <- function(bwfiles_c1,
+                                 bwfiles_c2,
+                                 bedfile,
+                                 label_c1,
+                                 label_c2,
+                                 estimate_size_factors = FALSE) {
+
+  loci_c1 <- bw_bed(bwfiles_c1, bedfile = bedfile)
+  loci_c2 <- bw_bed(bwfiles_c2, bedfile = bedfile)
+
+  bw_granges_diff_analysis(loci_c1, loci_c2, label_c1, label_c2,
+                           estimate_size_factors = estimate_size_factors)
+}
 
-  cts <- cbind(cts_c1, cts_c2)
 
-  condition_labels <- c(rep(label_c1, ncol(cts_c1)),
-                        rep(label_c2, ncol(cts_c2)))
+#' Compute DESeq2 differential analysis on GRanges objects
+#'
+#' Runs a DESeq2 analysis on loci specified on GRanges objects.
+#' The particularity of this analysis is that it skips the estimateSizeFactors
+#' step by default, because this is accounted for in the scaling step of
+#' MINUTE-ChIP samples.
+#'
+#' @param granges_c1 GRanges object containing the values for condition 1.
+#' @param granges_c2 GRanges object containing the values for condition 2.
+#'     Note that these objects must correspond to the same loci.
+#' @param label_c1 Condition name for condition 1.
+#' @param label_c2 Condition name for condition 2.
+#' @param estimate_size_factors If TRUE, normal DESeq2 procedure is done. Set it
+#'     to true to analyze non-MINUTE data.
+#' @importFrom DESeq2 DESeqDataSetFromMatrix estimateDispersions nbinomWaldTest `sizeFactors<-` results estimateSizeFactors
+#' @return a DESeqResults object as returned by DESeq2::results function
+#' @export
+bw_granges_diff_analysis <- function(granges_c1,
+                                     granges_c2,
+                                     label_c1,
+                                     label_c2,
+                                     estimate_size_factors = FALSE) {
+
+  # Bind first, get numbers after (drop complete cases separately could cause error)
+  granges_c1 <- sortSeqlevels(granges_c1)
+  granges_c1 <- sort(granges_c1)
+
+  granges_c2 <- sortSeqlevels(granges_c2)
+  granges_c2 <- sort(granges_c2)
+
+  cts_df <- cbind(data.frame(granges_c1), mcols(granges_c2))
+
+  cts <- get_nreads_columns(cts_df[, 6:ncol(cts_df)])
+
+  condition_labels <- c(rep(label_c1, length(mcols(granges_c1))),
+                        rep(label_c2, length(mcols(granges_c2))))
 
   coldata <- data.frame(colnames(cts), condition = condition_labels)
 
@@ -38,10 +98,15 @@ bw_bins_diff_analysis <- function(bwfiles_c1,
                                 colData = coldata,
                                 design = ~ condition)
 
-  # Since files are scaled, we do not want to estimate size factors, so give it
-  # an array of ones
-  ncolumns <- length(bwfiles_c1) + length(bwfiles_c2)
-  sizeFactors(dds) <- c(rep(1, ncolumns))
+
+  if (estimate_size_factors == TRUE) {
+    dds <- estimateSizeFactors(dds)
+  }
+  else {
+    # Since files are scaled, we do not want to estimate size factors, so give it
+    # an array of ones
+    sizeFactors(dds) <- c(rep(1, ncol(cts)))
+  }
 
   dds <- estimateDispersions(dds)
   dds <- nbinomWaldTest(dds)
@@ -50,25 +115,23 @@ bw_bins_diff_analysis <- function(bwfiles_c1,
 }
 
 
-#' Get values in a granges object as round numeric values in a matrix.
+#' Get values in a data frame object as round numeric values in a matrix.
 #'
 #' This is an auxiliary function for stats. It drops NAs or NaN values, only
 #' complete cases are used.
 #'
-#' @param granges Target granges object
+#' @param df Target data frame
 #' @param fraglen Estimated fragment length
 #'
 #' @return An integer matrix
 #' @importFrom stats complete.cases
-get_nreads_columns <- function(granges, fraglen = 150) {
+get_nreads_columns <- function(df, fraglen = 150) {
   # TODO: Consider whether to multiply by locus length. For bins analysis
   # this should not affect results, but for genes or loci of different length
   # it might. Since we skip the size factor step, we may bias the results?
   # So right now it's only fragment length
   length_factor <- fraglen
-
-  bins.df <- data.frame(granges)
-  cts <- as.matrix(mcols(granges))
+  cts <- as.matrix(df)
   cts <- as.matrix(cts[complete.cases(cts),])
   cts <- round(cts*length_factor)
   cts
diff --git a/elsasserlib/man/bw_bed_diff_analysis.Rd b/elsasserlib/man/bw_bed_diff_analysis.Rd
new file mode 100644
index 0000000..abbe338
--- /dev/null
+++ b/elsasserlib/man/bw_bed_diff_analysis.Rd
@@ -0,0 +1,38 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/bwstats.R
+\name{bw_bed_diff_analysis}
+\alias{bw_bed_diff_analysis}
+\title{Run DESeq2 analysis on bed file}
+\usage{
+bw_bed_diff_analysis(
+  bwfiles_c1,
+  bwfiles_c2,
+  bedfile,
+  label_c1,
+  label_c2,
+  estimate_size_factors = FALSE
+)
+}
+\arguments{
+\item{bwfiles_c1}{Path or array of paths to the bigWig files for first condition.}
+
+\item{bwfiles_c2}{Path or array of paths to the bigWig files for second condition.}
+
+\item{bedfile}{BED file for locus specific analysis.}
+
+\item{label_c1}{Condition name for condition 1.}
+
+\item{label_c2}{Condition name for condition 2.}
+
+\item{estimate_size_factors}{If TRUE, normal DESeq2 procedure is done. Set it
+to true to analyze non-MINUTE data.}
+}
+\value{
+a DESeqResults object as returned by DESeq2::results function
+}
+\description{
+Runs a DESeq2 analysis on a set of loci specified in a BED file.
+The particularity of this analysis is that it skips the estimateSizeFactors
+step by default, because this is accounted for in the scaling step of
+MINUTE-ChIP samples.
+}
diff --git a/elsasserlib/man/bw_bins_diff_analysis.Rd b/elsasserlib/man/bw_bins_diff_analysis.Rd
index f836fb1..7fab21d 100644
--- a/elsasserlib/man/bw_bins_diff_analysis.Rd
+++ b/elsasserlib/man/bw_bins_diff_analysis.Rd
@@ -10,7 +10,8 @@ bw_bins_diff_analysis(
   label_c1,
   label_c2,
   bin_size = 10000,
-  genome = "mm9"
+  genome = "mm9",
+  estimate_size_factors = FALSE
 )
 }
 \arguments{
@@ -18,13 +19,16 @@ bw_bins_diff_analysis(
 
 \item{bwfiles_c2}{Path or array of paths to the bigWig files for second condition.}
 
-\item{label_c1}{Condition name for first condition}
+\item{label_c1}{Condition name for condition 1.}
 
-\item{label_c2}{Condition name for second condition}
+\item{label_c2}{Condition name for condition 2.}
 
 \item{bin_size}{Bin size.}
 
 \item{genome}{Genome. Available choices are mm9, hg38.}
+
+\item{estimate_size_factors}{If TRUE, normal DESeq2 procedure is done. Set it
+to true to analyze non-MINUTE data.}
 }
 \value{
 a DESeqResults object as returned by DESeq2::results function
diff --git a/elsasserlib/man/bw_granges_diff_analysis.Rd b/elsasserlib/man/bw_granges_diff_analysis.Rd
new file mode 100644
index 0000000..0134e52
--- /dev/null
+++ b/elsasserlib/man/bw_granges_diff_analysis.Rd
@@ -0,0 +1,36 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/bwstats.R
+\name{bw_granges_diff_analysis}
+\alias{bw_granges_diff_analysis}
+\title{Compute DESeq2 differential analysis on GRanges objects}
+\usage{
+bw_granges_diff_analysis(
+  granges_c1,
+  granges_c2,
+  label_c1,
+  label_c2,
+  estimate_size_factors = FALSE
+)
+}
+\arguments{
+\item{granges_c1}{GRanges object containing the values for condition 1.}
+
+\item{granges_c2}{GRanges object containing the values for condition 2.
+Note that these objects must correspond to the same loci.}
+
+\item{label_c1}{Condition name for condition 1.}
+
+\item{label_c2}{Condition name for condition 2.}
+
+\item{estimate_size_factors}{If TRUE, normal DESeq2 procedure is done. Set it
+to true to analyze non-MINUTE data.}
+}
+\value{
+a DESeqResults object as returned by DESeq2::results function
+}
+\description{
+Runs a DESeq2 analysis on loci specified on GRanges objects.
+The particularity of this analysis is that it skips the estimateSizeFactors
+step by default, because this is accounted for in the scaling step of
+MINUTE-ChIP samples.
+}
diff --git a/elsasserlib/man/get_nreads_columns.Rd b/elsasserlib/man/get_nreads_columns.Rd
index b3fcc18..41b785a 100644
--- a/elsasserlib/man/get_nreads_columns.Rd
+++ b/elsasserlib/man/get_nreads_columns.Rd
@@ -2,12 +2,12 @@
 % Please edit documentation in R/bwstats.R
 \name{get_nreads_columns}
 \alias{get_nreads_columns}
-\title{Get values in a granges object as round numeric values in a matrix.}
+\title{Get values in a data frame object as round numeric values in a matrix.}
 \usage{
-get_nreads_columns(granges, fraglen = 150)
+get_nreads_columns(df, fraglen = 150)
 }
 \arguments{
-\item{granges}{Target granges object}
+\item{df}{Target data frame}
 
 \item{fraglen}{Estimated fragment length}
 }

From f93316da1de244a5259a3bf32c2e212ffa11eddf Mon Sep 17 00:00:00 2001
From: "C. Navarro" <cnluzon@gmail.com>
Date: Wed, 7 Oct 2020 15:03:07 +0200
Subject: [PATCH 4/6] DESeq2 dependency on CI

---
 .github/workflows/R-CMD-check.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index c7ed99d..d6397c6 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -67,7 +67,7 @@ jobs:
       - name: Manually install BioConductor dependencies
         run: |
           install.packages("BiocManager")
-          BiocManager::install(c("rtracklayer", "GenomeInfoDb", "GenomicRanges", "BSgenome.Mmusculus.UCSC.mm9", "BSgenome.Hsapiens.UCSC.hg38"))
+          BiocManager::install(c("rtracklayer", "GenomeInfoDb", "GenomicRanges", "BSgenome.Mmusculus.UCSC.mm9", "BSgenome.Hsapiens.UCSC.hg38", "DESeq2"))
         shell: Rscript {0}
         working-directory: elsasserlib
 

From 2c817e7f115a13f4a32faf066930b4375bdcfd42 Mon Sep 17 00:00:00 2001
From: "C. Navarro" <cnluzon@gmail.com>
Date: Wed, 7 Oct 2020 16:19:22 +0200
Subject: [PATCH 5/6] Length factor scaled to 1000 as default

---
 elsasserlib/R/bwstats.R               | 5 ++---
 elsasserlib/man/get_nreads_columns.Rd | 4 ++--
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/elsasserlib/R/bwstats.R b/elsasserlib/R/bwstats.R
index f80597d..b88adb7 100644
--- a/elsasserlib/R/bwstats.R
+++ b/elsasserlib/R/bwstats.R
@@ -121,16 +121,15 @@ bw_granges_diff_analysis <- function(granges_c1,
 #' complete cases are used.
 #'
 #' @param df Target data frame
-#' @param fraglen Estimated fragment length
+#' @param length_factor Scaling factor to multiply coverage values by.
 #'
 #' @return An integer matrix
 #' @importFrom stats complete.cases
-get_nreads_columns <- function(df, fraglen = 150) {
+get_nreads_columns <- function(df, length_factor = 1000) {
   # TODO: Consider whether to multiply by locus length. For bins analysis
   # this should not affect results, but for genes or loci of different length
   # it might. Since we skip the size factor step, we may bias the results?
   # So right now it's only fragment length
-  length_factor <- fraglen
   cts <- as.matrix(df)
   cts <- as.matrix(cts[complete.cases(cts),])
   cts <- round(cts*length_factor)
diff --git a/elsasserlib/man/get_nreads_columns.Rd b/elsasserlib/man/get_nreads_columns.Rd
index 41b785a..b25de3e 100644
--- a/elsasserlib/man/get_nreads_columns.Rd
+++ b/elsasserlib/man/get_nreads_columns.Rd
@@ -4,12 +4,12 @@
 \alias{get_nreads_columns}
 \title{Get values in a data frame object as round numeric values in a matrix.}
 \usage{
-get_nreads_columns(df, fraglen = 150)
+get_nreads_columns(df, length_factor = 1000)
 }
 \arguments{
 \item{df}{Target data frame}
 
-\item{fraglen}{Estimated fragment length}
+\item{length_factor}{Scaling factor to multiply coverage values by.}
 }
 \value{
 An integer matrix

From 888c23b9b7cd9518d0a576fd3ee88c44ea7e1033 Mon Sep 17 00:00:00 2001
From: "C. Navarro" <cnluzon@gmail.com>
Date: Wed, 7 Oct 2020 17:38:41 +0200
Subject: [PATCH 6/6] few parameter tests

---
 elsasserlib/tests/testthat/test_bwstats.R | 110 ++++++++++++++++++++++
 1 file changed, 110 insertions(+)
 create mode 100644 elsasserlib/tests/testthat/test_bwstats.R

diff --git a/elsasserlib/tests/testthat/test_bwstats.R b/elsasserlib/tests/testthat/test_bwstats.R
new file mode 100644
index 0000000..b13ac9e
--- /dev/null
+++ b/elsasserlib/tests/testthat/test_bwstats.R
@@ -0,0 +1,110 @@
+context("Test functions for bigWig stats")
+library(GenomicRanges)
+library(testthat)
+library(mockery)
+
+get_file_path <- function(filename) {
+  system.file("extdata", filename, package = "elsasserlib")
+}
+
+bw1 <- get_file_path("ES_H33_00h_rep1_hoxc.bw")
+bw2 <- get_file_path("ES_H33_03h_rep1_hoxc.bw")
+bg_bw <- get_file_path("ES_H33_inp_rep1_hoxc.bw")
+bed <- get_file_path("chromhmm_hoxc.bed")
+
+bw_limits <- GRanges(seqnames = c("chr15"),
+                     ranges = IRanges(c(102723600, 102959000)))
+
+reduced_bins <- bw_bins(c(bw1, bw2), selection = bw_limits)
+reduced_bg_bins <- bw_bins(c(bg_bw), selection = bw_limits)
+
+test_that("Setup files exist", {
+  expect_true(file_test("-f", bw1))
+  expect_true(file_test("-f", bw2))
+  expect_true(file_test("-f", bg_bw))
+  expect_true(file_test("-f", bed))
+})
+
+test_that("bw_bins_diff_analysis passes on parameters", {
+  m_func <- mock()
+  m_bins <- mock(reduced_bins, reduced_bg_bins)
+  with_mock(
+    bw_granges_diff_analysis = m_func,
+    bw_bins = m_bins,
+    bw_bins_diff_analysis(c(bw1, bw2), bg_bw, "treated", "untreated")
+  )
+  expect_call(m_func, 1,
+              bw_granges_diff_analysis(bins_c1,
+                                       bins_c2,
+                                       label_c1,
+                                       label_c2,
+                                       estimate_size_factors = estimate_size_factors)
+  )
+
+  expect_call(m_bins, 1,
+              bw_bins(bwfiles_c1, genome = genome, bin_size = bin_size)
+  )
+
+  expect_call(m_bins, 2,
+              bw_bins(bwfiles_c2, genome = genome, bin_size = bin_size)
+  )
+
+  expect_args(m_func, 1,
+              granges_c1 = reduced_bins,
+              granges_c2 = reduced_bg_bins,
+              label_c1 = "treated",
+              label_c2 = "untreated",
+              estimate_size_factors = FALSE)
+
+  expect_args(m_bins, 1,
+              c(bw1, bw2),
+              genome = "mm9",
+              bin_size = 10000)
+
+  expect_args(m_bins, 2,
+              bg_bw,
+              genome = "mm9",
+              bin_size = 10000)
+})
+
+
+test_that("bw_bed_diff_analysis passes on parameters", {
+  m_func <- mock()
+  m_bed <- mock(reduced_bins, reduced_bg_bins)
+  with_mock(
+    bw_granges_diff_analysis = m_func,
+    bw_bed = m_bed,
+    bw_bed_diff_analysis(c(bw1, bw2), bg_bw, bed, "treated", "untreated")
+  )
+  expect_call(m_func, 1,
+              bw_granges_diff_analysis(loci_c1,
+                                       loci_c2,
+                                       label_c1,
+                                       label_c2,
+                                       estimate_size_factors = estimate_size_factors)
+  )
+
+  expect_call(m_bed, 1,
+              bw_bed(bwfiles_c1, bedfile = bedfile)
+  )
+
+  expect_call(m_bed, 2,
+              bw_bed(bwfiles_c2, bedfile = bedfile)
+  )
+
+  expect_args(m_func, 1,
+              granges_c1 = reduced_bins,
+              granges_c2 = reduced_bg_bins,
+              label_c1 = "treated",
+              label_c2 = "untreated",
+              estimate_size_factors = FALSE)
+
+  expect_args(m_bed, 1,
+              c(bw1, bw2),
+              bedfile = bed)
+
+  expect_args(m_bed, 2,
+              bg_bw,
+              bedfile = bed)
+})
+