add plot method for eigenvalues (#160)

Signed-off-by: Daena Rys <rysdaena8@gmail.com> Co-authored-by: Leo Lahti <leo.lahti@iki.fi> Co-authored-by: TuomasBorman <tvborm@utu.fi>
microbiome · Nov 20, 2024 · 6216147 · 6216147
1 parent 94696fa
commit 6216147
Show file tree

Hide file tree

Showing 6 changed files with 421 additions and 2 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: miaViz
 Title: Microbiome Analysis Plotting and Visualization
-Version: 1.15.2
+Version: 1.15.3
 Authors@R: 
     c(person(given = "Tuomas", family = "Borman", role = c("aut", "cre"),
              email = "tuomas.v.borman@utu.fi",
@@ -52,8 +52,9 @@ Imports:
     ggtree,
     methods,
     rlang,
-    scater,
     S4Vectors,
+    scales,
+    scater,
     SingleCellExperiment,
     stats,
     tibble,

diff --git a/NAMESPACE b/NAMESPACE
@@ -14,6 +14,7 @@ export(plotRDA)
 export(plotRowGraph)
 export(plotRowPrevalence)
 export(plotRowTile)
+export(plotScree)
 export(plotSeries)
 export(plotTaxaPrevalence)
 exportMethods("colTreeData<-")
@@ -37,6 +38,7 @@ exportMethods(plotRowGraph)
 exportMethods(plotRowPrevalence)
 exportMethods(plotRowTile)
 exportMethods(plotRowTree)
+exportMethods(plotScree)
 exportMethods(plotSeries)
 exportMethods(plotTaxaPrevalence)
 exportMethods(rowTreeData)
@@ -133,6 +135,7 @@ importFrom(mia,meltSE)
 importFrom(rlang,"!!")
 importFrom(rlang,":=")
 importFrom(rlang,sym)
+importFrom(scales,pretty_breaks)
 importFrom(scater,plotReducedDim)
 importFrom(scater,retrieveCellInfo)
 importFrom(scater,retrieveFeatureInfo)

diff --git a/NEWS b/NEWS
@@ -36,3 +36,4 @@ Changes in version 1.13.x
 
 Changes in version 1.15.x
 + plotAbundance: Improved visualization of sample metadata
++ plotScree: Method for creating scree plots
diff --git a/R/plotScree.R b/R/plotScree.R
@@ -0,0 +1,279 @@
+#' @name
+#' plotScree
+#' 
+#' @title
+#' Create a scree plot
+#' 
+#' @description 
+#' \code{plotScree} generates a scree plot to visualize the eigenvalues.
+#' The eigenvalues can be provided either as a part of a
+#' \code{TreeSummarizedExperiment} object or as a separate \code{vector}.
+#' This plot illustrates the decline in eigenvalues across components,
+#' helping to assess the importance of each component.
+#' 
+#' @details
+#' \code{plotScree} generates a scree plot to visualize the relative importance
+#' of components in dimensionality reduction techniques such as Principal
+#' Component Analysis (PCA) or Principal Coordinate Analysis (PCoA). If the
+#' input is a \code{TreeSummarizedExperiment} object, the function extracts
+#' eigenvalues from the specified reduced dimension slot, which requires that
+#' dimensionality reduction has been performed beforehand using a dedicated
+#' function. Alternatively, if the input is a \code{vector} or an
+#' \code{eigenvals} object, these values are directly used as eigenvalues for
+#' the plot.
+#' 
+#' The plot can include a combination of barplot, points, connecting lines,
+#' and labels, which can be controlled using the \code{show.*} parameters.
+#' 
+#' An option to show cumulative explained variance is also available by setting
+#' \code{add.cumulative = TRUE}.
+#' 
+#' @return 
+#' A \code{ggplot2} object 
+#' 
+#' @param x a
+#' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-constructor]{TreeSummarizedExperiment}}
+#' \code{\link[vegan:eigenvals]{eigenvals}} or a vector.
+#' 
+#' @param dimred \code{Character scalar} or \code{integer scalar}. Determines
+#' the reduced dimension to plot. This is used when \code{x} is a
+#' \code{TreeSummarizedExperiment} to extract the eigenvalues from
+#' \code{reducedDim(x, dimred)}.
+#' 
+#' @param ... additional parameters for plotting
+#' \itemize{
+#'   \item \code{show.barplot}: \code{Logical scalar}. Whether to show a
+#'   barplot. (Default: \code{TRUE})
+#'   
+#'   \item \code{show.points}: \code{Logical scalar}. Whether to show a
+#'   points. (Default: \code{TRUE})
+#'   
+#'   \item \code{show.line}: \code{Logical scalar}. Whether to show a
+#'   line. (Default: \code{TRUE})
+#'   
+#'   \item \code{show.labels}: \code{Logical scalar}. Whether to show a
+#'   labels for each point. (Default: \code{FALSE})
+#'   
+#'   \item \code{add.proportion}: \code{Logical scalar}. Whether to show
+#'   proportion of explained variance, i.e., raw eigenvalues.
+#'   (Default: \code{TRUE})
+#'   
+#'   \item \code{add.cumulative}: \code{Logical scalar}. Whether to show
+#'   cumulative explained variance calculated from eigenvalues.
+#'   (Default: \code{FALSE})
+#'   
+#'   \item \code{n}: \code{Integer scalar}. Number of eigenvalues to plot.
+#'   If \code{NULL}, all eigenvalues are plotted. (Default: \code{NULL})
+#'   
+#'   \item \code{show.names}: \code{Logical scalar}. Whether to show names of
+#'   components in x-axis. If \code{FALSE}, the index of component is shown
+#'   instead of names. (Default: \code{FALSE})
+#'   
+#'   \item \code{eig.name}: \code{Character scalar}. The name of the attribute
+#'   in \code{reducedDim(x, dimred)} that contains the eigenvalues.
+#'   (Default: \code{c("eig", "varExplained")})
+#' }
+#'
+#' @examples
+#' 
+#' library(miaViz)
+#' library(scater)
+#' 
+#' data("enterotype", package = "mia")
+#' tse <- enterotype
+#'  
+#' # Run  PCA and store results into TreeSE
+#' tse <- transformAssay(tse, method = "clr", pseudocount = TRUE)
+#' tse <- runPCA(tse, assay.type = "clr")
+#' 
+#' # Plot scree plot
+#' plotScree(tse, "PCA", add.cumulative = TRUE)
+#' 
+NULL
+
+#' @rdname plotScree
+#' @export
+setGeneric("plotScree", signature = c("x"),
+    function(x, ...) standardGeneric("plotScree"))
+
+#' @rdname plotScree
+#' @export
+setMethod("plotScree", signature = c(x = "SingleCellExperiment"),
+    function(x, dimred, ...){
+        eig <- .get_eigenvalues(x, dimred, ...)
+        p <- plotScree(eig, ...)
+        return(p)
+    }
+)
+
+#' @rdname plotScree
+#' @export
+setMethod("plotScree", signature = c(x = "ANY"),
+    function(x, ...){
+        # Check that the the values are in correct format
+        is_correct <- length(x) > 0L &&
+            ((is.vector(x) && is.numeric(x)) || is(x, "eigenvals"))
+        if( !is_correct ){
+            stop("'x' must be a numeric vector or class 'eigenvals'.",
+                call. = FALSE)
+        }
+        # Prepare data for plotting
+        plot_data <- .prepare_data(x, ...)
+        # Create a plot
+        p <- .scree_plotter(plot_data, ...)
+        return(p)
+    }
+)
+
+################################ HELP FUNCTIONS ################################
+
+# This function retrieves the eigenvalues from reducedDim. The ordination must
+# be calculated with dedicaded function in mia or scater so that the eigenvalues
+# are stored in correct place.
+.get_eigenvalues <- function(
+        x, dimred, eig.name = c("eig", "varExplained"), ...){
+    # Get reducedDim
+    if( !((.is_a_string(dimred) && dimred %in% reducedDimNames(x)) ||
+            (.is_an_integer(dimred) && dimred > 0 &&
+            dimred <= length(reducedDims(x)))) ){
+        stop("'dimred' must specify a valid reducedDim.", call. = FALSE)
+    }
+    reduced_dim <- reducedDim(x, dimred)
+    # Get eigenvalues
+    eig.name <- eig.name[ eig.name %in% names(attributes(reduced_dim)) ]
+    if( length(eig.name) != 1L ){
+        stop("'eig.name' must specify a name of attributes from ",
+            "reducedDim(x, dimred).", call. = FALSE)
+    }
+    eig <- attributes(reduced_dim)[[ eig.name ]]
+    return(eig)
+}
+
+# This function creates a data.frame from the input vector. The output is ready
+# for plotter.
+.prepare_data <- function(
+        x, add.proportion = TRUE, add.cumulative = FALSE, n = NULL,
+        show.names = FALSE, ...){
+    # Input check
+    if( !.is_a_bool(add.proportion) ){
+        stop("'add.proportion' must be TRUE or FALSE.", call. = FALSE)
+    }
+    if( !.is_a_bool(add.cumulative) ){
+        stop("'add.cumulative' must be TRUE or FALSE.", call. = FALSE)
+    }
+    if( !(is.null(n) || .is_an_integer(n) ) ){
+        stop("'n' must be NULL or integer.", call. = FALSE)
+    }
+    if( !.is_a_bool(show.names) ){
+        stop("'show.names' must be TRUE or FALSE.", call. = FALSE)
+    }
+    #
+    # Create a data.frame with eigenvalues
+    df <- data.frame(y = x)
+    df[["x"]] <- factor(rownames(df), levels = unique(rownames(df)))
+    df[["type"]] <- "proportion"
+    # Calculate cumulative proportion
+    df_cum <- df
+    df_cum[["y"]] <- cumsum(df_cum[["y"]]) /
+        sum(df_cum[["y"]], na.rm = TRUE)
+    df_cum[["type"]] <- "cumulative"
+    df <- rbind(df, df_cum)
+
+    # Based on user preference, keep proportion or/and cumulative values 
+    if( !add.proportion ){
+        df <- df[df[["type"]] != "proportion", ]
+    }
+    if( !add.cumulative ){
+        df <- df[df[["type"]] != "cumulative", ]
+    }
+    # If user has specified, take only n first eigenvalues
+    if( !is.null(n) ){
+        n <- levels(df[["x"]])[ seq_len(n) ]
+        df <- df[ df[["x"]] %in% n,  ]
+    }
+    # Replace names with integers to keep the x-axis of plot tidier
+    if( !show.names ){
+        df[["x"]] <- as.integer(df[["x"]])
+    }
+    return(df)
+}
+
+# This function creates a scree plot. The input is data.frame that includes
+# 2 columns: one for eigenvalues and other for principal component name.
+#' @importFrom scales pretty_breaks
+.scree_plotter <- function(
+        df, show.points = TRUE, show.line = TRUE, show.barplot = FALSE,
+        show.labels = FALSE, ...){
+    # Input check
+    if( !.is_a_bool(show.points) ){
+        stop("'show.points' must be TRUE or FALSE.", call. = FALSE)
+    }
+    if( !.is_a_bool(show.line) ){
+        stop("'show.line' must be TRUE or FALSE.", call. = FALSE)
+    }
+    if( !.is_a_bool(show.barplot) ){
+        stop("'show.barplot' must be TRUE or FALSE.", call. = FALSE)
+    }
+    if( !.is_a_bool(show.labels) ){
+        stop("'show.labels' must be TRUE or FALSE.", call. = FALSE)
+    }
+    #
+    # If user wants to add proportion and cumulative values to same plot, we
+    # scale cumulative values into same scale as proportion.
+    if( length(unique(df[["type"]])) > 1L && !(show.labels || show.barplot) ){
+        ind <- df[["type"]] == "cumulative"
+        df[ind, "y"] <- df[ind, "y"] * max(df[!ind, "y"]) # Scale
+    }
+
+    # Create base plot
+    p <- ggplot(df, aes(
+        x = x,
+        y = y,
+        group = type,
+        colour = if(length(unique(.data[["type"]])) > 1L &&
+            !(show.labels || show.barplot) ) type
+        ))
+    # Add layers based on user preference
+    if( show.points ){
+        p <- p + geom_point()
+    }
+    if( show.line ){
+        p <- p + geom_line()
+    }
+    if( show.barplot ){
+        p <- p + geom_col(width = 0.5)
+    }
+    if( show.labels ){
+        p <- p + geom_label(aes(label = round(y, 2)))
+    }
+
+    # If user wants to add barplots or labels with both cumulative and
+    # propotion values, the plot is splitted into two facets. Otherwise the
+    # the plot would be too messy to read.
+    if( length(unique(df[["type"]])) > 1L && (show.labels || show.barplot) ){
+        p <- p + facet_grid(rows = vars(type), scales = "free_y")
+    }
+    # If user wants to plot both cumulative and proportion, but with points
+    # and lines only, we can add tem to same plot. Now the plot has two y-axis;
+    # proportion at left and cumulative at right
+    if( length(unique(df[["type"]])) > 1L && !(show.labels || show.barplot) ){
+        p <- p + scale_y_continuous(
+            name = "Proportion",
+            sec.axis = sec_axis(
+                ~ . / max(df[["y"]]), name = "Cumulative proportion"))
+    }
+    # Adjust labels in a case where either proportion or cumulative was plotted
+    if( length(unique(df[["type"]])) == 1L ){
+        p <- p + labs(x = "PC", y = "Eigenvalue")
+    }
+    # Adjust the x-axis to display a subset of evenly spaced values for
+    # improved readability
+    if( is.numeric(df[["x"]]) ){
+        p <- p +
+            scale_x_continuous(breaks = pretty_breaks())
+    }
+    # Adjust theme and remove legend
+    p <- p + theme_classic() +
+        theme(legend.position = "none")
+    return(p)
+}
Original file line number	Diff line number	Diff line change
Expand Up		@@ -36,3 +36,4 @@ Changes in version 1.13.x

		Changes in version 1.15.x
		+ plotAbundance: Improved visualization of sample metadata
		+ plotScree: Method for creating scree plots