Skip to content

Commit

Permalink
add plot method for eigenvalues (#160)
Browse files Browse the repository at this point in the history
Signed-off-by: Daena Rys <rysdaena8@gmail.com>
Co-authored-by: Leo Lahti <leo.lahti@iki.fi>
Co-authored-by: TuomasBorman <tvborm@utu.fi>
  • Loading branch information
3 people authored Nov 20, 2024
1 parent 94696fa commit 6216147
Show file tree
Hide file tree
Showing 6 changed files with 421 additions and 2 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: miaViz
Title: Microbiome Analysis Plotting and Visualization
Version: 1.15.2
Version: 1.15.3
Authors@R:
c(person(given = "Tuomas", family = "Borman", role = c("aut", "cre"),
email = "tuomas.v.borman@utu.fi",
Expand Down Expand Up @@ -52,8 +52,9 @@ Imports:
ggtree,
methods,
rlang,
scater,
S4Vectors,
scales,
scater,
SingleCellExperiment,
stats,
tibble,
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export(plotRDA)
export(plotRowGraph)
export(plotRowPrevalence)
export(plotRowTile)
export(plotScree)
export(plotSeries)
export(plotTaxaPrevalence)
exportMethods("colTreeData<-")
Expand All @@ -37,6 +38,7 @@ exportMethods(plotRowGraph)
exportMethods(plotRowPrevalence)
exportMethods(plotRowTile)
exportMethods(plotRowTree)
exportMethods(plotScree)
exportMethods(plotSeries)
exportMethods(plotTaxaPrevalence)
exportMethods(rowTreeData)
Expand Down Expand Up @@ -133,6 +135,7 @@ importFrom(mia,meltSE)
importFrom(rlang,"!!")
importFrom(rlang,":=")
importFrom(rlang,sym)
importFrom(scales,pretty_breaks)
importFrom(scater,plotReducedDim)
importFrom(scater,retrieveCellInfo)
importFrom(scater,retrieveFeatureInfo)
Expand Down
1 change: 1 addition & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,4 @@ Changes in version 1.13.x

Changes in version 1.15.x
+ plotAbundance: Improved visualization of sample metadata
+ plotScree: Method for creating scree plots
279 changes: 279 additions & 0 deletions R/plotScree.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
#' @name
#' plotScree
#'
#' @title
#' Create a scree plot
#'
#' @description
#' \code{plotScree} generates a scree plot to visualize the eigenvalues.
#' The eigenvalues can be provided either as a part of a
#' \code{TreeSummarizedExperiment} object or as a separate \code{vector}.
#' This plot illustrates the decline in eigenvalues across components,
#' helping to assess the importance of each component.
#'
#' @details
#' \code{plotScree} generates a scree plot to visualize the relative importance
#' of components in dimensionality reduction techniques such as Principal
#' Component Analysis (PCA) or Principal Coordinate Analysis (PCoA). If the
#' input is a \code{TreeSummarizedExperiment} object, the function extracts
#' eigenvalues from the specified reduced dimension slot, which requires that
#' dimensionality reduction has been performed beforehand using a dedicated
#' function. Alternatively, if the input is a \code{vector} or an
#' \code{eigenvals} object, these values are directly used as eigenvalues for
#' the plot.
#'
#' The plot can include a combination of barplot, points, connecting lines,
#' and labels, which can be controlled using the \code{show.*} parameters.
#'
#' An option to show cumulative explained variance is also available by setting
#' \code{add.cumulative = TRUE}.
#'
#' @return
#' A \code{ggplot2} object
#'
#' @param x a
#' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-constructor]{TreeSummarizedExperiment}}
#' \code{\link[vegan:eigenvals]{eigenvals}} or a vector.
#'
#' @param dimred \code{Character scalar} or \code{integer scalar}. Determines
#' the reduced dimension to plot. This is used when \code{x} is a
#' \code{TreeSummarizedExperiment} to extract the eigenvalues from
#' \code{reducedDim(x, dimred)}.
#'
#' @param ... additional parameters for plotting
#' \itemize{
#' \item \code{show.barplot}: \code{Logical scalar}. Whether to show a
#' barplot. (Default: \code{TRUE})
#'
#' \item \code{show.points}: \code{Logical scalar}. Whether to show a
#' points. (Default: \code{TRUE})
#'
#' \item \code{show.line}: \code{Logical scalar}. Whether to show a
#' line. (Default: \code{TRUE})
#'
#' \item \code{show.labels}: \code{Logical scalar}. Whether to show a
#' labels for each point. (Default: \code{FALSE})
#'
#' \item \code{add.proportion}: \code{Logical scalar}. Whether to show
#' proportion of explained variance, i.e., raw eigenvalues.
#' (Default: \code{TRUE})
#'
#' \item \code{add.cumulative}: \code{Logical scalar}. Whether to show
#' cumulative explained variance calculated from eigenvalues.
#' (Default: \code{FALSE})
#'
#' \item \code{n}: \code{Integer scalar}. Number of eigenvalues to plot.
#' If \code{NULL}, all eigenvalues are plotted. (Default: \code{NULL})
#'
#' \item \code{show.names}: \code{Logical scalar}. Whether to show names of
#' components in x-axis. If \code{FALSE}, the index of component is shown
#' instead of names. (Default: \code{FALSE})
#'
#' \item \code{eig.name}: \code{Character scalar}. The name of the attribute
#' in \code{reducedDim(x, dimred)} that contains the eigenvalues.
#' (Default: \code{c("eig", "varExplained")})
#' }
#'
#' @examples
#'
#' library(miaViz)
#' library(scater)
#'
#' data("enterotype", package = "mia")
#' tse <- enterotype
#'
#' # Run PCA and store results into TreeSE
#' tse <- transformAssay(tse, method = "clr", pseudocount = TRUE)
#' tse <- runPCA(tse, assay.type = "clr")
#'
#' # Plot scree plot
#' plotScree(tse, "PCA", add.cumulative = TRUE)
#'
NULL

#' @rdname plotScree
#' @export
setGeneric("plotScree", signature = c("x"),
function(x, ...) standardGeneric("plotScree"))

#' @rdname plotScree
#' @export
setMethod("plotScree", signature = c(x = "SingleCellExperiment"),
function(x, dimred, ...){
eig <- .get_eigenvalues(x, dimred, ...)
p <- plotScree(eig, ...)
return(p)
}
)

#' @rdname plotScree
#' @export
setMethod("plotScree", signature = c(x = "ANY"),
function(x, ...){
# Check that the the values are in correct format
is_correct <- length(x) > 0L &&
((is.vector(x) && is.numeric(x)) || is(x, "eigenvals"))
if( !is_correct ){
stop("'x' must be a numeric vector or class 'eigenvals'.",
call. = FALSE)
}
# Prepare data for plotting
plot_data <- .prepare_data(x, ...)
# Create a plot
p <- .scree_plotter(plot_data, ...)
return(p)
}
)

################################ HELP FUNCTIONS ################################

# This function retrieves the eigenvalues from reducedDim. The ordination must
# be calculated with dedicaded function in mia or scater so that the eigenvalues
# are stored in correct place.
.get_eigenvalues <- function(
x, dimred, eig.name = c("eig", "varExplained"), ...){
# Get reducedDim
if( !((.is_a_string(dimred) && dimred %in% reducedDimNames(x)) ||
(.is_an_integer(dimred) && dimred > 0 &&
dimred <= length(reducedDims(x)))) ){
stop("'dimred' must specify a valid reducedDim.", call. = FALSE)
}
reduced_dim <- reducedDim(x, dimred)
# Get eigenvalues
eig.name <- eig.name[ eig.name %in% names(attributes(reduced_dim)) ]
if( length(eig.name) != 1L ){
stop("'eig.name' must specify a name of attributes from ",
"reducedDim(x, dimred).", call. = FALSE)
}
eig <- attributes(reduced_dim)[[ eig.name ]]
return(eig)
}

# This function creates a data.frame from the input vector. The output is ready
# for plotter.
.prepare_data <- function(
x, add.proportion = TRUE, add.cumulative = FALSE, n = NULL,
show.names = FALSE, ...){
# Input check
if( !.is_a_bool(add.proportion) ){
stop("'add.proportion' must be TRUE or FALSE.", call. = FALSE)
}
if( !.is_a_bool(add.cumulative) ){
stop("'add.cumulative' must be TRUE or FALSE.", call. = FALSE)
}
if( !(is.null(n) || .is_an_integer(n) ) ){
stop("'n' must be NULL or integer.", call. = FALSE)
}
if( !.is_a_bool(show.names) ){
stop("'show.names' must be TRUE or FALSE.", call. = FALSE)
}
#
# Create a data.frame with eigenvalues
df <- data.frame(y = x)
df[["x"]] <- factor(rownames(df), levels = unique(rownames(df)))
df[["type"]] <- "proportion"
# Calculate cumulative proportion
df_cum <- df
df_cum[["y"]] <- cumsum(df_cum[["y"]]) /
sum(df_cum[["y"]], na.rm = TRUE)
df_cum[["type"]] <- "cumulative"
df <- rbind(df, df_cum)

# Based on user preference, keep proportion or/and cumulative values
if( !add.proportion ){
df <- df[df[["type"]] != "proportion", ]
}
if( !add.cumulative ){
df <- df[df[["type"]] != "cumulative", ]
}
# If user has specified, take only n first eigenvalues
if( !is.null(n) ){
n <- levels(df[["x"]])[ seq_len(n) ]
df <- df[ df[["x"]] %in% n, ]
}
# Replace names with integers to keep the x-axis of plot tidier
if( !show.names ){
df[["x"]] <- as.integer(df[["x"]])
}
return(df)
}

# This function creates a scree plot. The input is data.frame that includes
# 2 columns: one for eigenvalues and other for principal component name.
#' @importFrom scales pretty_breaks
.scree_plotter <- function(
df, show.points = TRUE, show.line = TRUE, show.barplot = FALSE,
show.labels = FALSE, ...){
# Input check
if( !.is_a_bool(show.points) ){
stop("'show.points' must be TRUE or FALSE.", call. = FALSE)
}
if( !.is_a_bool(show.line) ){
stop("'show.line' must be TRUE or FALSE.", call. = FALSE)
}
if( !.is_a_bool(show.barplot) ){
stop("'show.barplot' must be TRUE or FALSE.", call. = FALSE)
}
if( !.is_a_bool(show.labels) ){
stop("'show.labels' must be TRUE or FALSE.", call. = FALSE)
}
#
# If user wants to add proportion and cumulative values to same plot, we
# scale cumulative values into same scale as proportion.
if( length(unique(df[["type"]])) > 1L && !(show.labels || show.barplot) ){
ind <- df[["type"]] == "cumulative"
df[ind, "y"] <- df[ind, "y"] * max(df[!ind, "y"]) # Scale
}

# Create base plot
p <- ggplot(df, aes(
x = x,
y = y,
group = type,
colour = if(length(unique(.data[["type"]])) > 1L &&
!(show.labels || show.barplot) ) type
))
# Add layers based on user preference
if( show.points ){
p <- p + geom_point()
}
if( show.line ){
p <- p + geom_line()
}
if( show.barplot ){
p <- p + geom_col(width = 0.5)
}
if( show.labels ){
p <- p + geom_label(aes(label = round(y, 2)))
}

# If user wants to add barplots or labels with both cumulative and
# propotion values, the plot is splitted into two facets. Otherwise the
# the plot would be too messy to read.
if( length(unique(df[["type"]])) > 1L && (show.labels || show.barplot) ){
p <- p + facet_grid(rows = vars(type), scales = "free_y")
}
# If user wants to plot both cumulative and proportion, but with points
# and lines only, we can add tem to same plot. Now the plot has two y-axis;
# proportion at left and cumulative at right
if( length(unique(df[["type"]])) > 1L && !(show.labels || show.barplot) ){
p <- p + scale_y_continuous(
name = "Proportion",
sec.axis = sec_axis(
~ . / max(df[["y"]]), name = "Cumulative proportion"))
}
# Adjust labels in a case where either proportion or cumulative was plotted
if( length(unique(df[["type"]])) == 1L ){
p <- p + labs(x = "PC", y = "Eigenvalue")
}
# Adjust the x-axis to display a subset of evenly spaced values for
# improved readability
if( is.numeric(df[["x"]]) ){
p <- p +
scale_x_continuous(breaks = pretty_breaks())
}
# Adjust theme and remove legend
p <- p + theme_classic() +
theme(legend.position = "none")
return(p)
}
Loading

0 comments on commit 6216147

Please sign in to comment.