Skip to content

Commit

Permalink
Merge pull request compbiomed#43 from dfjenkins3/master
Browse files Browse the repository at this point in the history
Updates from 2017-02-06 to 2017-02-12
  • Loading branch information
dfjenkins3 authored Feb 13, 2017
2 parents 834bed0 + 0aaeebe commit 9aa2f6c
Show file tree
Hide file tree
Showing 12 changed files with 224 additions and 121 deletions.
7 changes: 7 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
language: r

sudo: required

warnings_are_errors: true

bioc_required: true
7 changes: 5 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: singleCellTK
Type: Package
Title: Interactive Analysis of Single Cell RNA-Seq Data
Version: 0.1.0
Version: 0.1.2
Author: David Jenkins
Maintainer: David Jenkins <dfj@bu.edu>
Description: Run common single cell analysis directly through your browser
Expand All @@ -22,5 +22,8 @@ Imports:
plotly,
ggplot2,
Rtsne,
shinyjs
shinyjs,
RColorBrewer,
methods,
Biobase
RoxygenNote: 5.0.1
14 changes: 7 additions & 7 deletions R/Downsample.Matrix.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#' Downsample Data
#'
#' @param datamatrix
#' @param newcounts
#' @param byBatch
#' @param batch
#' @param iterations
#' @param datamatrix TODO:document
#' @param newcounts TODO:document
#' @param byBatch TODO:document
#' @param batch TODO:document
#' @param iterations TODO:document
#'
#' @return Downsampled matrix
#' @export Downsample
Expand All @@ -15,7 +15,7 @@ Downsample <- function(datamatrix, newcounts = c(4, 16, 64, 256, 1024, 4096, 163
for (j in 1:dim(datamatrix)[2]) {
probs <- datamatrix[, j] / sum(datamatrix[, j])
for (k in 1:length(newcounts)) {
samps <- rmultinom(iterations, newcounts[k], probs)
samps <- stats::rmultinom(iterations, newcounts[k], probs)
for (l in 1:iterations) {
outmat[,j,k,l] <- samps[,l]
}
Expand All @@ -27,7 +27,7 @@ Downsample <- function(datamatrix, newcounts = c(4, 16, 64, 256, 1024, 4096, 163
for (j in 1:nlevels(batch)) {
probs <- datamatrix[,which(batch == levels(batch)[j])] / sum(datamatrix[,which(batch == levels(batch)[j])])
for (k in 1:length(newcounts)) {
samps <- rmultinom(iterations, newcounts[k], as.vector(probs))
samps <- stats::rmultinom(iterations, newcounts[k], as.vector(probs))
for (l in 1:iterations) {
outmat[,which(batch == levels(batch)[j]),k,l] <- as.matrix(samps[,l], nrow = dim(datamatrix)[1])
}
Expand Down
52 changes: 39 additions & 13 deletions R/misc_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ summarizeTable <- function(indata){
"Genes with no expression across all samples"),
"Value"=c(ncol(indata),
nrow(indata),
sum(apply(counts(indata), 2, function(x) sum(as.numeric(x)==0)) < 1700),
sum(rowSums(counts(indata)) == 0))))
sum(apply(scater::counts(indata), 2, function(x) sum(as.numeric(x)==0)) < 1700),
sum(rowSums(scater::counts(indata)) == 0))))
}

#' Create a SCESet object
Expand All @@ -23,21 +23,47 @@ summarizeTable <- function(indata){
#' object.
#'
#' @param countfile The path to a text file that contains a header row of sample
#' names, and rows of raw counts per gene for those samples
#' names, and rows of raw counts per gene for those samples.
#' @param annotfile The path to a text file that contains columns of annotation
#' information for each sample in the countfile. This file should have the same
#' number of rows as there are columns in the countfile.
#' @param featurefile The path to a text file that contains columns of
#' annotation information for each gene in the count matrix. This file should
#' have the same genes in the same order as countfile. This is optional.
#' @param inputdataframes If TRUE, countfile and annotfile are read as data
#' frames instead of file paths. The default is FALSE.
#' instead of
#'
#' @return a SCESet object
#' @export createSCESet
createSCESet <- function(countfile, annotfile){
countsin <- read.table(countfile, sep="\t", header=T, row.names=1)
annotin <- read.table(annotfile, sep="\t", header=T, row.names=1)
pd <- new("AnnotatedDataFrame", data = annotin)

gene_df <- data.frame(Gene = rownames(countsin))
rownames(gene_df) <- gene_df$Gene
fd <- new("AnnotatedDataFrame", data = gene_df)
return(newSCESet(countData = countsin, phenoData = pd,
featureData = fd))
createSCESet <- function(countfile=NULL, annotfile=NULL, featurefile=NULL,
inputdataframes=FALSE){
if(is.null(countfile)){
stop("You must supply a count file.")
}
if(inputdataframes){
countsin <- countfile
annotin <- annotfile
featurein <- featurefile
} else{
countsin <- utils::read.table(countfile, sep="\t", header=T, row.names=1)
if(!is.null(annotfile)){
annotin <- utils::read.table(annotfile, sep="\t", header=T, row.names=1)
}
if(!is.null(featurefile)){
featurein <- utils::read.table(featurefile, sep="\t", header=T, row.names=1)
}
}
if(is.null(annotfile)){
annotin <- data.frame(row.names=colnames(countsin))
annotin$Sample <- rownames(annotin)
}
if(is.null(featurefile)){
featurein <- data.frame(Gene = rownames(countsin))
rownames(featurein) <- featurein$Gene
}
pd <- methods::new("AnnotatedDataFrame", data = annotin)
fd <- methods::new("AnnotatedDataFrame", data = featurein)
return(scater::newSCESet(countData = countsin, phenoData = pd,
featureData = fd))
}
53 changes: 29 additions & 24 deletions R/scDiffEx.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
#' up to ntop genes. Required
#' @param diffexmethod The method for performing differential expression
#' analyis. Available options are DESeq, DESeq2, and limma. Required
#' @param clusterRow Cluster the rows. The default is TRUE
#' @param clusterCol Cluster the columns. The default is TRUE
#'
#' @return A list of differentially expressed genes.
#' @export scDiffEx
Expand All @@ -24,7 +26,7 @@
scDiffEx <- function(inSCESet, condition, significance=0.05, ntop=500,
usesig=TRUE, diffexmethod, clusterRow=TRUE,
clusterCol=TRUE){
in.condition <- droplevels(as.factor(pData(inSCESet)[,condition]))
in.condition <- droplevels(as.factor(scater::pData(inSCESet)[,condition]))
if (length(levels(in.condition)) != 2)
stop("only two labels supported, ", condition, " has ",
length(levels(in.condition)), " labels")
Expand Down Expand Up @@ -58,23 +60,25 @@ scDiffEx <- function(inSCESet, condition, significance=0.05, ntop=500,

#' Plot Differential Expression
#'
#' @param inSCESet
#' @param condition
#' @param geneList
#' @param clusterRow
#' @param clusterCol
#' @param inSCESet Input data object that contains the data to be plotted.
#' Required
#' @param condition The condition used for plotting the heatmap. Required
#' @param geneList The list of genes to put in the heatmap. Required
#' @param clusterRow Cluster the rows. The default is TRUE
#' @param clusterCol Cluster the columns. The default is TRUE
#'
#' @return
#' @return ComplexHeatmap object for the provided geneList annotated with the
#' condition.
#' @export plot_DiffEx
#'
plot_DiffEx <- function(inSCESet, condition, geneList, clusterRow=TRUE,
clusterCol=TRUE){
diffex.annotation <- data.frame(pData(inSCESet)[,condition])
diffex.annotation <- data.frame(scater::pData(inSCESet)[,condition])
colnames(diffex.annotation) <- condition
topha <- ComplexHeatmap::HeatmapAnnotation(df = diffex.annotation,
height = unit(0.333, "cm"))

heatmap <- ComplexHeatmap::Heatmap(t(scale(t(exprs(inSCESet)[geneList,]))),
heatmap <- ComplexHeatmap::Heatmap(t(scale(t(Biobase::exprs(inSCESet)[geneList,]))),
name="Expression",
column_title = "Differential Expression",
cluster_rows = clusterRow,
Expand All @@ -85,23 +89,24 @@ plot_DiffEx <- function(inSCESet, condition, geneList, clusterRow=TRUE,

#' Plot Interactive Differential Expression
#'
#' @param inSCESet
#' @param condition
#' @param geneList
#' @param clusterRow
#' @param clusterCol
#' @param inSCESet Input data object that contains the data to be plotted.
#' Required
#' @param condition The condition used for plotting the heatmap. Required
#' @param geneList The list of genes to put in the heatmap. Required
#' @param clusterRow Cluster the rows. The default is TRUE
#' @param clusterCol Cluster the columns. The default is TRUE
#'
#' @return
#' @return A d3heatmap object is plotted
#' @export plot_d3DiffEx
#'
plot_d3DiffEx <- function(inSCESet, condition, geneList, clusterRow=TRUE,
clusterCol=TRUE){
diffex.annotation <- data.frame(pData(inSCESet)[,condition])
diffex.annotation <- data.frame(scater::pData(inSCESet)[,condition])
colnames(diffex.annotation) <- condition
topha <- ComplexHeatmap::HeatmapAnnotation(df = diffex.annotation,
height = unit(0.333, "cm"))

d3heatmap::d3heatmap(t(scale(t(exprs(inSCESet)[geneList,]))),
d3heatmap::d3heatmap(t(scale(t(Biobase::exprs(inSCESet)[geneList,]))),
Rowv=clusterRow,
Colv=clusterCol,
ColSideColors=RColorBrewer::brewer.pal(8, "Set1")[as.numeric(factor(diffex.annotation[,1]))])
Expand All @@ -120,8 +125,8 @@ plot_d3DiffEx <- function(inSCESet, condition, geneList, clusterRow=TRUE,
#' @export scDiffEx_deseq2
#'
scDiffEx_deseq2 <- function(inSCESet, condition){
cnts <- counts(inSCESet)
annot_data <- pData(inSCESet)[,condition,drop=F]
cnts <- scater::counts(inSCESet)
annot_data <- scater::pData(inSCESet)[,condition,drop=F]
colnames(annot_data) <- "condition"
dds <- DESeq2::DESeqDataSetFromMatrix(countData = cnts,
colData = annot_data,
Expand All @@ -143,7 +148,7 @@ scDiffEx_deseq2 <- function(inSCESet, condition){
#' @export scDiffEx_deseq
#'
scDiffEx_deseq <- function(inSCESet, condition){
countData <- DESeq::newCountDataSet(counts(inSCESet), condition)
countData <- DESeq::newCountDataSet(scater::counts(inSCESet), condition)
countData <- DESeq::estimateSizeFactors(countData)
countData <- DESeq::estimateDispersions(countData, method="pooled",
fitType="local")
Expand All @@ -169,10 +174,10 @@ scDiffEx_deseq <- function(inSCESet, condition){
#' @export scDiffEx_limma
#'
scDiffEx_limma <- function(inSCESet, condition){
design <- model.matrix(~factor(pData(inSCESet)[,condition]))
fit <- lmFit(exprs(inSCESet), design)
ebayes <- eBayes(fit)
topGenes <- topTable(ebayes, coef=2, adjust="fdr", number=nrow(inSCESet))
design <- stats::model.matrix(~factor(scater::pData(inSCESet)[,condition]))
fit <- limma::lmFit(Biobase::exprs(inSCESet), design)
ebayes <- limma::eBayes(fit)
topGenes <- limma::topTable(ebayes, coef=2, adjust="fdr", number=nrow(inSCESet))
colnames(topGenes)[5] <- "padj"
return(topGenes)
}
Loading

0 comments on commit 9aa2f6c

Please sign in to comment.