Merge pull request compbiomed#43 from dfjenkins3/master

Updates from 2017-02-06 to 2017-02-12
dfjenkins3 · Feb 13, 2017 · 9aa2f6c · 9aa2f6c
2 parents 834bed0 + 0aaeebe
commit 9aa2f6c
Show file tree

Hide file tree

Showing 12 changed files with 224 additions and 121 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,7 @@
+language: r
+
+sudo: required
+
+warnings_are_errors: true
+
+bioc_required: true
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: singleCellTK
 Type: Package
 Title: Interactive Analysis of Single Cell RNA-Seq Data
-Version: 0.1.0
+Version: 0.1.2
 Author: David Jenkins
 Maintainer: David Jenkins <dfj@bu.edu>
 Description: Run common single cell analysis directly through your browser
@@ -22,5 +22,8 @@ Imports:
     plotly,
     ggplot2,
     Rtsne,
-    shinyjs
+    shinyjs,
+    RColorBrewer,
+    methods,
+    Biobase
 RoxygenNote: 5.0.1
diff --git a/R/Downsample.Matrix.R b/R/Downsample.Matrix.R
@@ -1,10 +1,10 @@
 #' Downsample Data
 #'
-#' @param datamatrix 
-#' @param newcounts 
-#' @param byBatch 
-#' @param batch 
-#' @param iterations 
+#' @param datamatrix TODO:document
+#' @param newcounts TODO:document
+#' @param byBatch TODO:document
+#' @param batch TODO:document
+#' @param iterations TODO:document
 #'
 #' @return Downsampled matrix
 #' @export Downsample
@@ -15,7 +15,7 @@ Downsample <- function(datamatrix, newcounts = c(4, 16, 64, 256, 1024, 4096, 163
     for (j in 1:dim(datamatrix)[2]) {
       probs <- datamatrix[, j] / sum(datamatrix[, j])
       for (k in 1:length(newcounts)) {
-        samps <- rmultinom(iterations, newcounts[k], probs)
+        samps <- stats::rmultinom(iterations, newcounts[k], probs)
         for (l in 1:iterations) {
           outmat[,j,k,l] <- samps[,l]
         }
@@ -27,7 +27,7 @@ Downsample <- function(datamatrix, newcounts = c(4, 16, 64, 256, 1024, 4096, 163
     for (j in 1:nlevels(batch)) {
       probs <- datamatrix[,which(batch == levels(batch)[j])] / sum(datamatrix[,which(batch == levels(batch)[j])])
       for (k in 1:length(newcounts)) {
-        samps <- rmultinom(iterations, newcounts[k], as.vector(probs))
+        samps <- stats::rmultinom(iterations, newcounts[k], as.vector(probs))
         for (l in 1:iterations) {
           outmat[,which(batch == levels(batch)[j]),k,l] <- as.matrix(samps[,l], nrow = dim(datamatrix)[1])
         }

diff --git a/R/misc_functions.R b/R/misc_functions.R
@@ -13,8 +13,8 @@ summarizeTable <- function(indata){
                                "Genes with no expression across all samples"),
                     "Value"=c(ncol(indata),
                               nrow(indata),
-                              sum(apply(counts(indata), 2, function(x) sum(as.numeric(x)==0)) < 1700),
-                              sum(rowSums(counts(indata)) == 0))))
+                              sum(apply(scater::counts(indata), 2, function(x) sum(as.numeric(x)==0)) < 1700),
+                              sum(rowSums(scater::counts(indata)) == 0))))
 }
 
 #' Create a SCESet object
@@ -23,21 +23,47 @@ summarizeTable <- function(indata){
 #' object.
 #'
 #' @param countfile The path to a text file that contains a header row of sample
-#' names, and rows of raw counts per gene for those samples
+#' names, and rows of raw counts per gene for those samples.
 #' @param annotfile The path to a text file that contains columns of annotation
 #' information for each sample in the countfile. This file should have the same
 #' number of rows as there are columns in the countfile.
+#' @param featurefile The path to a text file that contains columns of
+#' annotation information for each gene in the count matrix. This file should
+#' have the same genes in the same order as countfile. This is optional.
+#' @param inputdataframes If TRUE, countfile and annotfile are read as data
+#' frames instead of file paths. The default is FALSE.
+#' instead of 
 #'
 #' @return a SCESet object
 #' @export createSCESet
-createSCESet <- function(countfile, annotfile){
-  countsin <- read.table(countfile, sep="\t", header=T, row.names=1)
-  annotin <- read.table(annotfile, sep="\t", header=T, row.names=1)
-  pd <- new("AnnotatedDataFrame", data = annotin)
-
-  gene_df <- data.frame(Gene = rownames(countsin))
-  rownames(gene_df) <- gene_df$Gene
-  fd <- new("AnnotatedDataFrame", data = gene_df)
-  return(newSCESet(countData = countsin, phenoData = pd,
-                   featureData = fd))
+createSCESet <- function(countfile=NULL, annotfile=NULL, featurefile=NULL,
+                         inputdataframes=FALSE){
+  if(is.null(countfile)){
+    stop("You must supply a count file.")
+  }
+  if(inputdataframes){
+    countsin <- countfile
+    annotin <- annotfile
+    featurein <- featurefile
+  } else{
+    countsin <- utils::read.table(countfile, sep="\t", header=T, row.names=1)
+    if(!is.null(annotfile)){
+      annotin <- utils::read.table(annotfile, sep="\t", header=T, row.names=1)
+    }
+    if(!is.null(featurefile)){
+      featurein <- utils::read.table(featurefile, sep="\t", header=T, row.names=1)
+    }
+  }
+  if(is.null(annotfile)){
+    annotin <- data.frame(row.names=colnames(countsin))
+    annotin$Sample <- rownames(annotin)
+  }
+  if(is.null(featurefile)){
+    featurein <- data.frame(Gene = rownames(countsin))
+    rownames(featurein) <- featurein$Gene
+  }
+  pd <- methods::new("AnnotatedDataFrame", data = annotin)
+  fd <- methods::new("AnnotatedDataFrame", data = featurein)
+  return(scater::newSCESet(countData = countsin, phenoData = pd,
+                           featureData = fd))
 }
diff --git a/R/scDiffEx.R b/R/scDiffEx.R
@@ -12,6 +12,8 @@
 #' up to ntop genes. Required
 #' @param diffexmethod The method for performing differential expression
 #' analyis. Available options are DESeq, DESeq2, and limma. Required
+#' @param clusterRow Cluster the rows. The default is TRUE
+#' @param clusterCol Cluster the columns. The default is TRUE
 #'
 #' @return A list of differentially expressed genes.
 #' @export scDiffEx
@@ -24,7 +26,7 @@
 scDiffEx <- function(inSCESet, condition, significance=0.05, ntop=500,
                      usesig=TRUE, diffexmethod, clusterRow=TRUE,
                      clusterCol=TRUE){
-  in.condition <- droplevels(as.factor(pData(inSCESet)[,condition]))
+  in.condition <- droplevels(as.factor(scater::pData(inSCESet)[,condition]))
   if (length(levels(in.condition)) != 2)
     stop("only two labels supported, ", condition, " has ",
          length(levels(in.condition)), " labels")
@@ -58,23 +60,25 @@ scDiffEx <- function(inSCESet, condition, significance=0.05, ntop=500,
 
 #' Plot Differential Expression
 #'
-#' @param inSCESet 
-#' @param condition 
-#' @param geneList 
-#' @param clusterRow 
-#' @param clusterCol 
+#' @param inSCESet Input data object that contains the data to be plotted.
+#' Required
+#' @param condition The condition used for plotting the heatmap. Required
+#' @param geneList The list of genes to put in the heatmap. Required
+#' @param clusterRow Cluster the rows. The default is TRUE
+#' @param clusterCol Cluster the columns. The default is TRUE
 #'
-#' @return
+#' @return ComplexHeatmap object for the provided geneList annotated with the
+#' condition.
 #' @export plot_DiffEx
 #'
 plot_DiffEx <- function(inSCESet, condition, geneList, clusterRow=TRUE,
                      clusterCol=TRUE){
-  diffex.annotation <- data.frame(pData(inSCESet)[,condition])
+  diffex.annotation <- data.frame(scater::pData(inSCESet)[,condition])
   colnames(diffex.annotation) <- condition
   topha <- ComplexHeatmap::HeatmapAnnotation(df = diffex.annotation,
                                              height = unit(0.333, "cm"))
 
-  heatmap <- ComplexHeatmap::Heatmap(t(scale(t(exprs(inSCESet)[geneList,]))),
+  heatmap <- ComplexHeatmap::Heatmap(t(scale(t(Biobase::exprs(inSCESet)[geneList,]))),
                                      name="Expression",
                                      column_title = "Differential Expression",
                                      cluster_rows = clusterRow,
@@ -85,23 +89,24 @@ plot_DiffEx <- function(inSCESet, condition, geneList, clusterRow=TRUE,
 
 #' Plot Interactive Differential Expression
 #'
-#' @param inSCESet 
-#' @param condition 
-#' @param geneList 
-#' @param clusterRow 
-#' @param clusterCol 
+#' @param inSCESet Input data object that contains the data to be plotted.
+#' Required
+#' @param condition The condition used for plotting the heatmap. Required
+#' @param geneList The list of genes to put in the heatmap. Required
+#' @param clusterRow Cluster the rows. The default is TRUE
+#' @param clusterCol Cluster the columns. The default is TRUE
 #'
-#' @return
+#' @return A d3heatmap object is plotted
 #' @export plot_d3DiffEx
 #'
 plot_d3DiffEx <- function(inSCESet, condition, geneList, clusterRow=TRUE,
                           clusterCol=TRUE){
-  diffex.annotation <- data.frame(pData(inSCESet)[,condition])
+  diffex.annotation <- data.frame(scater::pData(inSCESet)[,condition])
   colnames(diffex.annotation) <- condition
   topha <- ComplexHeatmap::HeatmapAnnotation(df = diffex.annotation,
                                              height = unit(0.333, "cm"))
 
-  d3heatmap::d3heatmap(t(scale(t(exprs(inSCESet)[geneList,]))),
+  d3heatmap::d3heatmap(t(scale(t(Biobase::exprs(inSCESet)[geneList,]))),
                        Rowv=clusterRow,
                        Colv=clusterCol,
                        ColSideColors=RColorBrewer::brewer.pal(8, "Set1")[as.numeric(factor(diffex.annotation[,1]))])
@@ -120,8 +125,8 @@ plot_d3DiffEx <- function(inSCESet, condition, geneList, clusterRow=TRUE,
 #' @export scDiffEx_deseq2
 #'
 scDiffEx_deseq2 <- function(inSCESet, condition){
-  cnts <- counts(inSCESet)
-  annot_data <- pData(inSCESet)[,condition,drop=F]
+  cnts <- scater::counts(inSCESet)
+  annot_data <- scater::pData(inSCESet)[,condition,drop=F]
   colnames(annot_data) <- "condition"
   dds <- DESeq2::DESeqDataSetFromMatrix(countData = cnts,
                                         colData = annot_data,
@@ -143,7 +148,7 @@ scDiffEx_deseq2 <- function(inSCESet, condition){
 #' @export scDiffEx_deseq
 #'
 scDiffEx_deseq <- function(inSCESet, condition){
-  countData <- DESeq::newCountDataSet(counts(inSCESet), condition)
+  countData <- DESeq::newCountDataSet(scater::counts(inSCESet), condition)
   countData <- DESeq::estimateSizeFactors(countData)
   countData <- DESeq::estimateDispersions(countData, method="pooled",
                                           fitType="local")
@@ -169,10 +174,10 @@ scDiffEx_deseq <- function(inSCESet, condition){
 #' @export scDiffEx_limma
 #'
 scDiffEx_limma <- function(inSCESet, condition){
-  design <- model.matrix(~factor(pData(inSCESet)[,condition]))
-  fit <- lmFit(exprs(inSCESet), design)
-  ebayes <- eBayes(fit)
-  topGenes <- topTable(ebayes, coef=2, adjust="fdr", number=nrow(inSCESet))
+  design <- stats::model.matrix(~factor(scater::pData(inSCESet)[,condition]))
+  fit <- limma::lmFit(Biobase::exprs(inSCESet), design)
+  ebayes <- limma::eBayes(fit)
+  topGenes <- limma::topTable(ebayes, coef=2, adjust="fdr", number=nrow(inSCESet))
   colnames(topGenes)[5] <- "padj"
   return(topGenes)
 }