Skip to content

Commit

Permalink
Merge pull request #8 from rcannood/devel
Browse files Browse the repository at this point in the history
Refactor SCORPIUS code
  • Loading branch information
rcannood authored Jun 29, 2018
2 parents 4eb1e6b + b691963 commit 782c108
Show file tree
Hide file tree
Showing 68 changed files with 227 additions and 906 deletions.
11 changes: 4 additions & 7 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: SCORPIUS
Type: Package
Title: Inferring Developmental Chronologies from Single-Cell RNA Sequencing Data
Version: 1.0.1
Version: 1.0.2.9001
Date: 2018-06-12
Description: An accurate and easy tool for performing trajectory inference on
single cells using single-cell RNA sequencing data. In addition, SCORPIUS
Expand Down Expand Up @@ -34,13 +34,12 @@ Depends:
R (>= 3.0.0)
Imports:
dplyr,
fitdistrplus,
dynutils,
grDevices,
ggplot2 (>= 2.0),
magrittr,
MASS,
mclust,
Rcpp,
pbapply,
pheatmap,
princurve (>= 2.0.2),
Expand All @@ -50,13 +49,11 @@ Imports:
reshape2,
splines,
stats,
testthat,
tidyr,
TSP,
utils
Suggests:
knitr,
rmarkdown
LinkingTo:
Rcpp
rmarkdown,
testthat

7 changes: 2 additions & 5 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,7 @@ export(infer_initial_trajectory)
export(infer_trajectory)
export(knn)
export(knn_distances)
export(outlier_filter)
export(outlierness)
export(reduce_dimensionality)
export(reduce_dimensionality_landmarked)
export(reverse_trajectory)
export(scale_quantile)
export(scale_uniform)
Expand All @@ -29,7 +26,8 @@ importFrom(RColorBrewer,brewer.pal)
importFrom(TSP,TSP)
importFrom(TSP,insert_dummy)
importFrom(TSP,solve_TSP)
importFrom(fitdistrplus,fitdist)
importFrom(dynutils,correlation_distance)
importFrom(dynutils,euclidean_distance)
importFrom(grDevices,hcl)
importFrom(magrittr,"%$%")
importFrom(magrittr,"%<>%")
Expand Down Expand Up @@ -61,4 +59,3 @@ importFrom(stats,setNames)
importFrom(tidyr,gather)
importFrom(tidyr,spread)
importFrom(utils,head)
useDynLib(SCORPIUS)
7 changes: 2 additions & 5 deletions R/SCORPIUS.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,11 @@
#' SCORPIUS orders single cells with regard to an implicit timeline,
#' such as cellular development or progression over time.
#'
#' @section Outlier functions:
#' \code{\link{outlierness}} \code{\link{outlier_filter}}
#'
#' @section Distance functions:
#' \code{\link{correlation_distance}}, \code{\link{euclidean_distance}}, \code{\link{knn}}
#' \code{\link{correlation_distance}}, \code{\link{euclidean_distance}}
#'
#' @section Dimensionality Reduction functions:
#' \code{\link{reduce_dimensionality}}, \code{\link{reduce_dimensionality_landmarked}}, \code{\link{scale_uniform}}, \code{\link{scale_quantile}}
#' \code{\link{reduce_dimensionality}}, \code{\link{scale_uniform}}, \code{\link{scale_quantile}}
#'
#' @section Trajectory Inference functions:
#' \code{\link{infer_trajectory}}, \code{\link{infer_initial_trajectory}}, \code{\link{reverse_trajectory}}, \code{\link{gene_importances}}, \code{\link{extract_modules}}
Expand Down
6 changes: 2 additions & 4 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@
#' dataset <- generate_dataset(type = "poly", num_genes = 500, num_samples = 1000, num_groups = 4)
#'
#' ## Reduce dimensionality and infer trajectory with SCORPIUS
#' dist <- correlation_distance(dataset$expression)
#' space <- reduce_dimensionality(dist, ndim=2)
#' space <- reduce_dimensionality(dataset$expression, correlation_distance, ndim = 2)
#' traj <- infer_trajectory(space)
#'
#' ## Visualise
Expand Down Expand Up @@ -121,8 +120,7 @@ generate_dataset <- function(type = c("splines", "polynomial"), num_samples = 40
#' data("ginhoux")
#'
#' ## Reduce dimensionality and infer trajectory with SCORPIUS
#' dist <- correlation_distance(ginhoux$expression)
#' space <- reduce_dimensionality(dist)
#' space <- reduce_dimensionality(ginhoux$expression, correlation_distance)
#' traj <- infer_trajectory(space)
#'
#' ## Visualise
Expand Down
131 changes: 112 additions & 19 deletions R/dimensionality_reduction.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
#'
#' @description \code{reduce_dimensionality} performs an eigenanalysis of the given dissimilarity matrix and returns coordinates of the samples represented in an \code{ndim}-dimensional space.
#'
#' @usage
#' reduce_dimensionality(dist, ndim, rescale=TRUE)
#'
#' @param dist A numeric matrix, data frame or "\code{dist}" object.
#' @param ndim The number of dimensions in the new space.
#' @param x a numeric matrix
#' @param dist_fun the distance function to be used; must have exactly two arguments, namely dist_fun(x, y).
#' @param ndim the maximum dimension of the space which the data are to be represented in; must be in {1, 2, \ldots, n-1}.
#' @param landmark_method Must be "naive" for now. Other landmark methods will be supported in the future.
#' @param num_landmarks the number of landmarks to be selected.
#' @param rescale A logical indicating whether or not the returned space should be rescaled and centered.
#'
#' @return A matrix containing the coordinates of each sample, represented in an \code{ndim}-dimensional space.
Expand All @@ -22,22 +22,115 @@
#' dataset <- generate_dataset(type = "poly", num_genes = 500, num_samples = 1000, num_groups = 4)
#'
#' ## Reduce the dimensionality of this dataset
#' dist <- correlation_distance(dataset$expression)
#' space <- reduce_dimensionality(dist, ndim = 2)
#' space <- reduce_dimensionality(dataset$expression, correlation_distance, ndim = 2)
#'
#' ## Visualise the dataset
#' draw_trajectory_plot(space, progression_group=dataset$sample_info$group_name)
reduce_dimensionality <- function(dist, ndim = 3, rescale = TRUE) {
reduce_dimensionality <- function(x, dist_fun, ndim = 3, landmark_method = c("naive", "none"), num_landmarks = 1000, rescale = T) {
# input check
if (!is.matrix(dist) && !is.data.frame(dist) && class(dist) != "dist")
stop(sQuote("dist"), " must be a numeric matrix, data frame or a ", sQuote("dist"), " object")
if (class(dist) == "dist")
dist <- as.matrix(dist)
if (!is.finite(ndim) || round(ndim) != ndim || length(ndim) != 1 || ndim < 1 || ndim >= nrow(dist))
stop(sQuote("ndim"), " must be a whole number and 1 <= ndim <= nrow(dist)-1")

space <- stats::cmdscale(dist, k = ndim)
if (rescale) space <- scale_uniform(space)
colnames(space) <- paste("Comp", seq_len(ncol(space)), sep = "")
space
if (!is.matrix(x) && !is.data.frame(x))
stop(sQuote("x"), " must be a numeric matrix or data frame")
if (!is.function(dist_fun)) {
stop(sQuote("dist_fun"), " must be a function(x, y) {...}")
}
if (!is.finite(ndim) || round(ndim) != ndim || length(ndim) != 1 || ndim < 1 || ndim >= nrow(x))
stop(sQuote("ndim"), " must be a whole number and 1 <= ndim <= nrow(x)-1")

landmark_method <- match.arg(landmark_method)

if (landmark_method == "none") {
space <- stats::cmdscale(dist_fun(x), k = ndim)
if (rescale) space <- scale_uniform(space)
colnames(space) <- paste("Comp", seq_len(ncol(space)), sep = "")
space
} else {
lm_out <- landmark_selection(
x = x,
dist_fun = dist_fun,
landmark_method = landmark_method,
num_landmarks = num_landmarks
)

cmd_out <- cmdscale_withlandmarks(
dist_lm = lm_out$dist_lm,
dist_2lm = lm_out$dist_2lm,
ndim = ndim,
rescale = rescale
)
attr(cmd_out, "landmarks") <- lm_out$ix_lm
cmd_out
}
}

landmark_selection <- function(x, dist_fun, landmark_method, num_landmarks) {
if (nrow(x) < num_landmarks) {
num_landmarks <- nrow(x)
}
switch(
landmark_method,
"naive" = {
ix_lm <- sample.int(nrow(x), num_landmarks)
dist_lm <- dist_fun(x[ix_lm,,drop=FALSE], x[ix_lm,,drop=FALSE])
dist_2lm <- dist_fun(x[ix_lm,,drop=FALSE], x)
list(ix_lm = ix_lm, dist_lm = dist_lm, dist_2lm = dist_2lm)
},
{
stop("landmark_method must be ", sQuote("naive"), ".")
}
)
}

cmdscale_withlandmarks <- function(dist_lm, dist_2lm, ndim = 3, rescale = T) {
d <- dist_lm
if (anyNA(d))
stop("NA values not allowed in 'd'")

x <- as.matrix(d^2)
storage.mode(x) <- "double"
if (nrow(x) != ncol(x))
stop("distances must be result of 'dist' or a square matrix")

rn <- rownames(x)
rn_all <- colnames(dist_2lm)
n <- as.integer(nrow(x))
N <- as.integer(ncol(dist_2lm))

if((ndim <- as.integer(ndim)) > n - 1 || ndim < 1)
stop("'ndim' must be in {1, 2, .. n - 1}")

# double center data
mu_n <- rowMeans(x)
mu <- mean(x)
x_dc <- x - rep(mu_n, n) - rep(mu_n, each = n) + mu

# classical MDS on landmarks
e <- eigen(-x_dc/2, symmetric = TRUE)
ev <- e$values[seq_len(ndim)]
evec <- e$vectors[, seq_len(ndim), drop = FALSE]
ndim1 <- sum(ev > 0)
if (ndim1 < ndim) {
warning(gettextf("only %d of the first %d eigenvalues are > 0", ndim1, ndim), domain = NA)
evec <- evec[, ev > 0, drop = FALSE]
ev <- ev[ev > 0]
}
Slm <- evec * rep(sqrt(ev), each=n)

# distance-based triangulation
points_inv <- evec / rep(sqrt(ev), each=n)
S <- (-t(dist_2lm - rep(mu_n, each = N))/2) %*% points_inv

# clean up dimension names
dimnames(Slm) <- list(rn, paste0("Comp", seq_len(ndim)))
dimnames(S) <- list(rn_all, paste0("Comp", seq_len(ndim)))

# rescale if necessary
if (rescale) {
Slm <- scale_uniform(Slm)
S <- scale_uniform(S)
}

# output
attr(S, "landmark_space") <- Slm

S
}
96 changes: 0 additions & 96 deletions R/dimensionality_reduction_landmarked.R

This file was deleted.

Loading

0 comments on commit 782c108

Please sign in to comment.