diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml deleted file mode 100644 index f4c4ef2..0000000 --- a/.github/workflows/lint.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples -# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help -on: - push: - branches: [main, master] - pull_request: - branches: [main, master] - -name: lint - -jobs: - lint: - runs-on: ubuntu-latest - env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - steps: - - uses: actions/checkout@v3 - - - uses: r-lib/actions/setup-r@v2 - with: - use-public-rspm: true - - - uses: r-lib/actions/setup-r-dependencies@v2 - with: - extra-packages: any::lintr, local::. - needs: lint - - - name: Lint - run: lintr::lint_package() - shell: Rscript {0} - env: - LINTR_ERROR_ON_LINT: true diff --git a/R/DESeq2_estimateSizeFactorsForMatrix.R b/R/DESeq2_estimateSizeFactorsForMatrix.R index 908f716..266d517 100644 --- a/R/DESeq2_estimateSizeFactorsForMatrix.R +++ b/R/DESeq2_estimateSizeFactorsForMatrix.R @@ -1,8 +1,8 @@ #' @title estimateSizeFactorsForMatrix from DESeq2 #' @author Simon Anders #' Please cite as -#' Love, M.I., Huber, W., Anders, S. Moderated estimation of fold change and dispersion for RNA-seq data with -#' DESeq2 Genome Biology 15(12):550 (2014) +#' Love, M.I., Huber, W., Anders, S. Moderated estimation of fold change and +#' dispersion for RNA-seq data with DESeq2 Genome Biology 15(12):550 (2014) #' #' Low-level function to estimate size factors with robust regression. #' diff --git a/R/ScMaSigProClass.R b/R/ScMaSigProClass.R index 13de1b2..7fba472 100644 --- a/R/ScMaSigProClass.R +++ b/R/ScMaSigProClass.R @@ -20,7 +20,7 @@ #' @slot bin_method A character string representing the algorithm used for #' binning. #' @slot path_col A character string representing the column name for branching -#' path assignment in 'Sparse' or 'Dense'data. See `colData` from the +#' path assignment in 'Sparse' or 'Dense' data. See `colData` from the #' \pkg{SingleCellExperiment} package. #' @slot bin_col A character string representing the name of the column in which #' bin labels are stored. @@ -32,7 +32,8 @@ #' metadata containing cell level annotations. (Default is "cell_type"). #' @slot g Update Description.. #' @slot p_value Significance Level. -#' @slot min_na Minimum values needed to estimate the model. +#' @slot min_na Minimum values needed per gene across cells to estimate the +#' model. #' @slot mt_correction A character string specifying the p-value correction #' method. #' @slot epsilon Model convergence tolerance. @@ -41,12 +42,13 @@ #' @slot offset A logical value specifying whether to use offset during fitting. #' @slot log_offset A logical value specifying whether to take the logarithm of #' the offsets. -#' @slot max_it Integer. Maximum number of iterations to fit the model. -#' @slot poly_degree Integer with the polynomial degree to fit the regression. 1 -#' @slot distribution Distribution used -#' @slot cluster_method Description -#' @slot use_dim description -#' @slot fill_na description +#' @slot max_it Maximum number of iterations to fit the model. +#' @slot poly_degree Order of the polynomial linear model. +#' @slot distribution Distribution of the error term. +#' @slot cluster_method Clustering method used for clustering significant genes. +#' @slot use_dim Dimension to use for filling the missing values before +#' clustering. +#' @slot fill_na Method to fill the missing values. #' #' @name ParameterConfig #' @aliases ParameterConfig-class @@ -100,14 +102,11 @@ setClass( } } - # Check for slot g if (!is.integer(object@g)) { stop("Slot 'g' must be an integer.") } - # Check for slot Q if (!is.numeric(object@p_value)) { - stop("Slot 'p_value' must be numeric.") } if (!is.numeric(object@epsilon)) { diff --git a/R/align_pseudotime.R b/R/align_pseudotime.R index 15825f5..169cebe 100644 --- a/R/align_pseudotime.R +++ b/R/align_pseudotime.R @@ -11,6 +11,7 @@ #' @importFrom scales rescale #' @importFrom S4Vectors DataFrame #' +#' @param scmpObj description #' @param ptime_col A character string representing the column name #' for inferred Pseudotime values in 'Sparse' data. See `colData` from the #' \pkg{SingleCellExperiment} package. (Default is "Pseudotime") diff --git a/R/create_scmp.R b/R/create_scmp.R index 728b447..708b229 100644 --- a/R/create_scmp.R +++ b/R/create_scmp.R @@ -41,7 +41,7 @@ create_scmp <- function(counts, msg = paste("Rownames of raw-counts and cell-level-metadata are different.") ) - if (!is.null(bin_counts) | !is.null(bin_cell_data)) { + if (!is.null(bin_counts) || !is.null(bin_cell_data)) { assert_that(nrow(bin_counts) == nrow(bin_cell_data), msg = paste("Number of cells in bin_counts and bin_cell_data are different.") ) diff --git a/R/m3_select_path.R b/R/m3_select_path.R index 8714d6c..375ad19 100644 --- a/R/m3_select_path.R +++ b/R/m3_select_path.R @@ -194,7 +194,7 @@ m3_select_path <- function(cds, inputType = "Monocle3", ptime_col = ptime_col ) - } else if (plot_purity & !all(supplied_nodes %in% anno.df[["node"]])) { + } else if (plot_purity && !all(supplied_nodes %in% anno.df[["node"]])) { # Tranfer data data <- anno.df @@ -213,7 +213,7 @@ m3_select_path <- function(cds, group_by(!!sym(node), !!sym(anno)) %>% summarise(count = n(), .groups = "drop") - data_summary <- data_summary[data_summary$count >= (mean(data_summary$count) + sd(data_summary$count)), ] + data_summary <- data_summary[data_summary[[count]] >= (mean(data_summary$count) + sd(data_summary[[count]])), ] # Plotting the data fraction_bar <- ggplot(data_summary, aes(x = .data$node, y = .data$count, fill = .data$anno)) + diff --git a/R/pb_helpers.R b/R/pb_helpers.R index 3706137..ad30aa4 100644 --- a/R/pb_helpers.R +++ b/R/pb_helpers.R @@ -474,7 +474,7 @@ optimize_bin_max <- function(bin_table, max_allowed, verbose = TRUE, #' cell counts. It does this by either taking the mean or sum of counts across #' cells in each bin, depending on the specified method. #' -#' @param scmpObject object of Class scMaSigPro. See \code{\link{ScMaSigPro}} +#' @param scmpObj object of Class scMaSigPro. See \code{\link{ScMaSigPro}} #' for more details. #' @param bin_mem_col Column name in the Dense metadata storing information #' about the members of the bins. (Default is 'scmp_bin_members'). @@ -502,29 +502,29 @@ optimize_bin_max <- function(bin_table, max_allowed, verbose = TRUE, #' #' @keywords internal -pb_counts <- function(scmpObject, - bin_mem_col = scmpObject@Parameters@bin_mem_col, - bin_col = scmpObject@Parameters@bin_col, +pb_counts <- function(scmpObj, + bin_mem_col = scmpObj@Parameters@bin_mem_col, + bin_col = scmpObj@Parameters@bin_col, assay_name = "counts", cluster_count_by = "sum") { # Check Object Validity - assert_that(is(scmpObject, "ScMaSigPro"), + assert_that(is(scmpObj, "ScMaSigPro"), msg = "Please provide object of class 'scMaSigPro'." ) # Count slot assert_that( all( - assay_name %in% names(scmpObject@Sparse@assays@data@listData) + assay_name %in% names(scmpObj@Sparse@assays@data@listData) ), - msg = paste0("'", assay_name, "' ", "doesn't exit in scmpObject.") + msg = paste0("'", assay_name, "' ", "doesn't exit in scmpObj.") ) # Get assay - counts <- scmpObject@Sparse@assays@data@listData[[assay_name]] + counts <- scmpObj@Sparse@assays@data@listData[[assay_name]] # Get Pseudobulk Profile - pseudo_bulk_profile <- as.data.frame(colData(scmpObject@Dense)) + pseudo_bulk_profile <- as.data.frame(colData(scmpObj@Dense)) assert_that(bin_mem_col %in% colnames(pseudo_bulk_profile), msg = paste0("'", bin_mem_col, "' does not exist in level.meta.data") @@ -567,8 +567,8 @@ pb_counts <- function(scmpObject, colnames(pb.counts) <- meta.info[[bin_col]] # Return the counts - scmpObject@Dense@assays@data@listData$bulk.counts <- as(pb.counts, "dgCMatrix") + scmpObj@Dense@assays@data@listData$bulk.counts <- as(pb.counts, "dgCMatrix") # return - return(scmpObject) + return(scmpObj) } diff --git a/R/plotBinTile.R b/R/plotBinTile.R index f3b4e4e..6934ccc 100644 --- a/R/plotBinTile.R +++ b/R/plotBinTile.R @@ -1,19 +1,23 @@ #' @title Plot Bin Sizes Across Binned Time and Paths #' #' @description -#' This function generates plots to visualize the dense cell metadata from a ScMaSigPro -#' object. It produces tile plot to display the bin sizes across different -#' binned time intervals and paths. +#' This function generates plots to visualize the Dense slot cell metadata +#' from a ScMaSigPro object. It produces tile plot to display the bin sizes +#' across different binned time intervals and paths. #' -#' @param scmpObj A ScMaSigPro class object with an additional slot 'Dense' that -#' contains compression information. -#' @param path_col Name of the column in `cell.metadata` storing information -#' for Path. -#' @param bin_size_col A title of the barplot -#' @param bin_ptime_col description +#' @param scmpObj An object of class \code{\link{ScMaSigPro}}. +#' @param path_col A character string representing the column name for branching +#' path assignment in 'Sparse' or 'Dense' slot. +#' @param bin_size_col A character string representing the name of the column in +#' which bin sizes per bin are stored. (Default is "scmp_bin_size"). +#' @param bin_ptime_col A character string representing the column name +#' for binned Pseudotime values in 'Dense' data. +#' (Default is "scmp_binned_pseudotime"). +#' +#' @return ggplot2 plot object. +#' +#' @author Priyansh Srivastava \email{spriyansh29@@gmail.com} #' -#' @return A tile plot made with `geom_tile()`, visualizing the bin sizes across -#' different binned time and paths. #' @export plotBinTile <- function(scmpObj, path_col = scmpObj@Parameters@path_col, diff --git a/R/plotIntersect.R b/R/plotIntersect.R index b8f9dbc..58c28c2 100644 --- a/R/plotIntersect.R +++ b/R/plotIntersect.R @@ -1,22 +1,28 @@ -#' @title Generate UpSet Plot on Intersection of Significant Genes from scmpObject +#' @title Generate UpSet Plot #' -#' @param scmpObj An object of class ScMaSigPro -#' @param min_intersection_size minimal number of observations in an intersection +#' @description +#' Generate UpSet Plot on Intersection of Significant Genes from scMaSigPro +#' object. It is a wrapper around `ComplexUpset::upset`. +#' +#' @importFrom S4Vectors isEmpty +#' @importFrom ComplexUpset upset intersection_matrix intersection_size upset_set_size +#' @importFrom RColorConesa colorConesa +#' +#' @param scmpObj An object of class \code{\link{ScMaSigPro}}. +#' @param min_intersection_size Minimal number of observations in an intersection #' for it to be included. -#' @param width_ratio ratio of the overall set size width to intersection matrix +#' @param width_ratio Ratio of the overall set size width to intersection matrix #' width. -#' @param keep_empty_groups whether empty sets should be kept (including sets +#' @param keep_empty_groups Whether empty sets should be kept (including sets #' which are only empty after filtering by size) -#' @param show_sets_size the overall set sizes plot, e.g. from upset_set_size() +#' @param show_sets_size The overall set sizes plot, e.g. from upset_set_size() #' (FALSE to hide) #' -#' @return An UpSet plot visualizing the intersections of significant genes across pathways. -#' @importFrom S4Vectors isEmpty -#' @importFrom ComplexUpset upset intersection_matrix intersection_size upset_set_size -#' @importFrom RColorConesa colorConesa +#' @return ggplot2 plot object. #' -#' @export +#' @author Priyansh Srivastava \email{spriyansh29@@gmail.com} #' +#' @export plotIntersect <- function(scmpObj, min_intersection_size = 2, keep_empty_groups = TRUE, width_ratio = 0.1, show_sets_size = FALSE) { diff --git a/R/plotTrend.R b/R/plotTrend.R index 331b455..4a49903 100644 --- a/R/plotTrend.R +++ b/R/plotTrend.R @@ -1,195 +1,200 @@ -#' Plot Groups Function +#' @title Plot trend of the single gene. #' -#' This function generates plots based on various parameters. It calculates the summary mode, colors, and other visual attributes to create a plot. -#' -#' @param scmpObj object of class scmpObj -#' @param feature_id Name of the gene to be plotted. Should correspond to one of -#' the feature in the count table. -#' @param xlab X-axis label. Default is "Pooled Pseudotime". -#' @param ylab Y-axis label. Default is "Pseudobulk Expression". -#' @param smoothness How smooth the trend should be. Default is 0.01, setting to -#' higher values will result in more linear trends. -#' @param logs Whether to plot log of counts. -#' @param logType Log Available options 'log', 'log2', 'log10' -#' @param pseudoCount Add a pseudo-count before taking the log. -#' @param significant Default is FALSE. Set to TRUE to plot genes, that don't pass -#' R-Square threshold from 'sc.filter()'. -#' @param summary_mode description +#' @description +#' Plot trend of the single gene across the binned pseudotime. #' #' @import ggplot2 #' @importFrom RColorConesa getConesaColors -#' @return Generates a plot. +#' +#' @param scmpObj An object of class \code{\link{ScMaSigPro}}. +#' @param feature_id Name of the gene to be plotted. +#' @param xlab X-axis label. (Default is "Pooled Pseudotime") +#' @param ylab Y-axis label. (Default is "Pseudobulk Expression") +#' @param smoothness How smooth the trend should be. Setting to +#' higher values will result in more linear trends. (Default is 0.01) +#' @param logs Whether to log transform counts. (Default is TRUE) +#' @param logType How to log transform the values. Available options 'log', +#' 'log2', 'log10'. (Default is 'log') +#' @param pseudoCount Add a pseudo-count before taking the log. (Default is 1) +#' @param significant Plot gene only if the models are significant based on +#' \code{scMaSigPro::sc.filter()}. (Default is TRUE) +#' @param summary_mode Compress the expression values per replicate (if present) +#' per binned pseudotime point. Default is 'median'. Other option 'mean' +#' +#' @return ggplot2 plot object. +#' +#' @author Priyansh Srivastava \email{spriyansh29@@gmail.com} +#' #' @export -plotTrend <- - function(scmpObj, - feature_id, - xlab = "Pooled Pseudotime", - ylab = "Pseudobulk Expression", - smoothness = 0.01, - logs = TRUE, - logType = "log", - pseudoCount = 1, - significant = TRUE, - summary_mode = "median") { - # Invoke Variables - pb.counts <- "pb.counts" - pooled.time <- "pooled.time" - path <- "path" - - # Check summary_mode - assert_that(any(summary_mode %in% c("median", "mean")), - msg = paste( - paste0("'", summary_mode, "'"), "is not a valid option. Please use one of", - paste(c("median", "mean"), collapse = ", ") - ) +plotTrend <- function(scmpObj, + feature_id, + xlab = "Pooled Pseudotime", + ylab = "Pseudobulk Expression", + smoothness = 0.01, + logs = TRUE, + logType = "log", + pseudoCount = 1, + significant = TRUE, + summary_mode = "median") { + # Invoke Variables + pb.counts <- "pb.counts" + pooled.time <- "pooled.time" + path <- "path" + + # Check summary_mode + assert_that(any(summary_mode %in% c("median", "mean")), + msg = paste( + paste0("'", summary_mode, "'"), "is not a valid option. Please use one of", + paste(c("median", "mean"), collapse = ", ") ) + ) - # Extract edisgn - alloc.frame <- scmpObj@Design@assignment_matrix %>% as.data.frame() + # Extract edisgn + alloc.frame <- scmpObj@Design@assignment_matrix %>% as.data.frame() - # Extract the bulk counts - bulk.counts <- scmpObj@Dense@assays@data@listData$bulk.counts + # Extract the bulk counts + bulk.counts <- scmpObj@Dense@assays@data@listData$bulk.counts - # Check - assert_that(all(feature_id %in% rownames(bulk.counts)), - msg = "Feature Id doesn't exist please select another one" - ) + # Check + assert_that(all(feature_id %in% rownames(bulk.counts)), + msg = "Feature Id doesn't exist please select another one" + ) - if (significant) { - assert_that(any(feature_id %in% unique(unlist(scmpObj@Significant@genes))), - msg = "Feature Id didn't pass the R2 threshold, please re-run sc.get.sigenes, with lower a value or set 'significant' to 'FALSE'" - ) - } + if (significant) { + assert_that(any(feature_id %in% unique(unlist(scmpObj@Significant@genes))), + msg = "Feature Id didn't pass the R2 threshold, please re-run sc.filter, with lower a value or set 'significant' to 'FALSE'" + ) + } - # gene_i - yy <- bulk.counts[rownames(bulk.counts) %in% feature_id, , drop = FALSE] - - # group Vector - groups.vector <- scmpObj@Design@groups.vector - - # Prepare for Tfit - rm <- matrix(yy, nrow = 1, ncol = length(yy)) - rownames(rm) <- c("ratio medio") - colnames(rm) <- rownames(scmpObj@Design@predictor_matrix) - - # Extract the beta - betas.table <- showCoeff(scmpObj, view = FALSE, return = TRUE) - betas <- betas.table[rownames(betas.table) %in% feature_id, , drop = FALSE] - - # Set Data - curve.df <- data.frame(x = 0, y = 0, path = scmpObj@Parameters@path_prefix) - line.df <- data.frame(x = 0, y = 0, path = scmpObj@Parameters@path_prefix) - colnames(line.df) <- c("x", "y", scmpObj@Parameters@path_col) - colnames(line.df) <- c("x", "y", scmpObj@Parameters@path_col) - curve_data <- NULL - path.names <- unique(scmpObj@Dense@colData[[scmpObj@Parameters@path_col]]) - - # Get x and y - x <- y <- rep(0, nrow(alloc.frame)) - - # Create Point df - points.df <- data.frame( - pooled.time = alloc.frame[, scmpObj@Parameters@bin_ptime_col], - pb.counts = as.vector(t(as.matrix(yy))), - path = scmpObj@Dense@colData[[scmpObj@Parameters@path_col]] + # gene_i + yy <- bulk.counts[rownames(bulk.counts) %in% feature_id, , drop = FALSE] + + # group Vector + groups.vector <- scmpObj@Design@groups.vector + + # Prepare for Tfit + rm <- matrix(yy, nrow = 1, ncol = length(yy)) + rownames(rm) <- c("ratio medio") + colnames(rm) <- rownames(scmpObj@Design@predictor_matrix) + + # Extract the beta + betas.table <- showCoeff(scmpObj, view = FALSE, return = TRUE) + betas <- betas.table[rownames(betas.table) %in% feature_id, , drop = FALSE] + + # Set Data + curve.df <- data.frame(x = 0, y = 0, path = scmpObj@Parameters@path_prefix) + line.df <- data.frame(x = 0, y = 0, path = scmpObj@Parameters@path_prefix) + colnames(line.df) <- c("x", "y", scmpObj@Parameters@path_col) + colnames(line.df) <- c("x", "y", scmpObj@Parameters@path_col) + curve_data <- NULL + path.names <- unique(scmpObj@Dense@colData[[scmpObj@Parameters@path_col]]) + + # Get x and y + x <- y <- rep(0, nrow(alloc.frame)) + + # Create Point df + points.df <- data.frame( + pooled.time = alloc.frame[, scmpObj@Parameters@bin_ptime_col], + pb.counts = as.vector(t(as.matrix(yy))), + path = scmpObj@Dense@colData[[scmpObj@Parameters@path_col]] + ) + # View(yy) + # View(points.df) + # stop() + for (i in path.names) { + # Extract Coeff + a <- reg.coeffs( + coefficients = betas, + groups.vector = groups.vector, + group = i ) - # View(yy) - # View(points.df) - # stop() - for (i in path.names) { - # Extract Coeff - a <- reg.coeffs( - coefficients = betas, - groups.vector = groups.vector, - group = i - ) - a <- c(a, rep(0, (7 - length(a)))) - a[is.na(a)] <- 0 - - # Extract the time - time <- alloc.frame[alloc.frame[[i]] == 1, scmpObj@Parameters@bin_ptime_col] - - # Create a data frame with time values - x <- seq(from = min(time), to = max(time), by = smoothness) - - # Compute the curve values - y <- a[1] + a[2] * x + a[3] * (x^2) + a[4] * (x^3) + - a[5] * (x^4) + a[6] * (x^5) + a[7] * (x^5) - - # Create tmpvector - curve_df_tmp <- data.frame( - x = x, y = y, - path = i - ) - curve.df <- rbind(curve.df, curve_df_tmp) - } + a <- c(a, rep(0, (7 - length(a)))) + a[is.na(a)] <- 0 - curve.df <- curve.df[-1, ] - - # Calc limits - xlim <- c(min(points.df[[pooled.time]], na.rm = TRUE), max(points.df[[pooled.time]], na.rm = TRUE) * 1.3) - # ylim <- c(min(as.numeric(points.df[[pb.counts]]), na.rm = TRUE), max(as.numeric(points.df[[pb.counts]]), na.rm = TRUE)) - - xlim[2] <- max(points.df[[pooled.time]]) - - conesa_colors <- getConesaColors()[c(TRUE, FALSE)][c(1:length(unique(points.df[[path]])))] - names(conesa_colors) <- unique(points.df[[path]]) - - # Extract sol - data.sol <- showSol(scmpObj, view = FALSE, return = TRUE) - data.sol <- data.sol[feature_id, , drop = FALSE] - - # if log is requestion - if (logs) { - if (logType == "log2") { - points.df$pb.counts <- log2(points.df$pb.counts + pseudoCount) - ylab <- paste0("log2(", ylab, ")") - } else if (logType == "log") { - points.df$pb.counts <- log(points.df$pb.counts + pseudoCount) - ylab <- paste0("log(", ylab, ")") - } else if (logType == "log10") { - points.df$pb.counts <- log10(points.df$pb.counts + pseudoCount) - ylab <- paste0("log10(", ylab, ")") - } else { - stop("'logType' should be one of 'log2', 'log10', 'log'") - } - } + # Extract the time + time <- alloc.frame[alloc.frame[[i]] == 1, scmpObj@Parameters@bin_ptime_col] + + # Create a data frame with time values + x <- seq(from = min(time), to = max(time), by = smoothness) - # Generate line.df - line.df <- points.df - - # Apply Summary Operation - if (summary_mode == "mean") { - line.df <- line.df %>% - group_by(pooled.time, path) %>% - summarize(pb.counts = mean(pb.counts), .groups = "drop") - } else if (summary_mode == "median") { - line.df <- line.df %>% - group_by(pooled.time, path) %>% - summarize(pb.counts = median(pb.counts), .groups = "drop") + # Compute the curve values + y <- a[1] + a[2] * x + a[3] * (x^2) + a[4] * (x^3) + + a[5] * (x^4) + a[6] * (x^5) + a[7] * (x^5) + + # Create tmpvector + curve_df_tmp <- data.frame( + x = x, y = y, + path = i + ) + curve.df <- rbind(curve.df, curve_df_tmp) + } + + curve.df <- curve.df[-1, ] + + # Calc limits + xlim <- c(min(points.df[[pooled.time]], na.rm = TRUE), max(points.df[[pooled.time]], na.rm = TRUE) * 1.3) + # ylim <- c(min(as.numeric(points.df[[pb.counts]]), na.rm = TRUE), max(as.numeric(points.df[[pb.counts]]), na.rm = TRUE)) + + xlim[2] <- max(points.df[[pooled.time]]) + + conesa_colors <- getConesaColors()[c(TRUE, FALSE)][c(1:length(unique(points.df[[path]])))] + names(conesa_colors) <- unique(points.df[[path]]) + + # Extract sol + data.sol <- showSol(scmpObj, view = FALSE, return = TRUE) + data.sol <- data.sol[feature_id, , drop = FALSE] + + # if log is requestion + if (logs) { + if (logType == "log2") { + points.df$pb.counts <- log2(points.df$pb.counts + pseudoCount) + ylab <- paste0("log2(", ylab, ")") + } else if (logType == "log") { + points.df$pb.counts <- log(points.df$pb.counts + pseudoCount) + ylab <- paste0("log(", ylab, ")") + } else if (logType == "log10") { + points.df$pb.counts <- log10(points.df$pb.counts + pseudoCount) + ylab <- paste0("log10(", ylab, ")") + } else { + stop("'logType' should be one of 'log2', 'log10', 'log'") } + } - # Plot - p <- ggplot() + - geom_point(data = points.df, aes(x = pooled.time, y = pb.counts, color = path), fill = "#102C57", alpha = 0.5, size = 2, stroke = 1, shape = 21) + - geom_line(data = line.df, aes(x = pooled.time, y = .data$pb.counts, color = path), linetype = "solid", linewidth = 1, alpha = 0.7) + - geom_line(data = curve.df, aes(x = x, y = y, color = path), linetype = "dashed", linewidth = 1, alpha = 0.7) + - ggtitle( - paste("Feature Id:", feature_id), - subtitle = paste("R2:", round(data.sol[, 2], 3), "| p-Value:", round(data.sol[, 1], 3)) - ) + - xlab(xlab) + - ylab(ylab) + - theme_classic(base_size = 12) + - theme( - legend.position = "bottom", - panel.grid.major = element_line(color = "grey90", linewidth = 0.3, linetype = "dashed"), - panel.grid.minor = element_blank() - ) + - scale_x_continuous(breaks = seq(min(xlim), max(xlim), by = round(log10(length(points.df[[pooled.time]]))))) + - labs(color = "Paths") + - # coord_cartesian(xlim = xlim, ylim = ylim) + - scale_color_manual(values = conesa_colors) - # - return(p) + # Generate line.df + line.df <- points.df + + # Apply Summary Operation + if (summary_mode == "mean") { + line.df <- line.df %>% + group_by(pooled.time, path) %>% + summarize(pb.counts = mean(pb.counts), .groups = "drop") + } else if (summary_mode == "median") { + line.df <- line.df %>% + group_by(pooled.time, path) %>% + summarize(pb.counts = median(pb.counts), .groups = "drop") } + + # Plot + p <- ggplot() + + geom_point(data = points.df, aes(x = pooled.time, y = pb.counts, color = path), fill = "#102C57", alpha = 0.5, size = 2, stroke = 1, shape = 21) + + geom_line(data = line.df, aes(x = pooled.time, y = .data$pb.counts, color = path), linetype = "solid", linewidth = 1, alpha = 0.7) + + geom_line(data = curve.df, aes(x = x, y = y, color = path), linetype = "dashed", linewidth = 1, alpha = 0.7) + + ggtitle( + paste("Feature Id:", feature_id), + subtitle = paste("R2:", round(data.sol[, 2], 3), "| p-Value:", round(data.sol[, 1], 3)) + ) + + xlab(xlab) + + ylab(ylab) + + theme_classic(base_size = 12) + + theme( + legend.position = "bottom", + panel.grid.major = element_line(color = "grey90", linewidth = 0.3, linetype = "dashed"), + panel.grid.minor = element_blank() + ) + + scale_x_continuous(breaks = seq(min(xlim), max(xlim), by = round(log10(length(points.df[[pooled.time]]))))) + + labs(color = "Paths") + + # coord_cartesian(xlim = xlim, ylim = ylim) + + scale_color_manual(values = conesa_colors) + # + return(p) +} diff --git a/R/plotTrendCluster.R b/R/plotTrendCluster.R index 578e8c7..309145b 100644 --- a/R/plotTrendCluster.R +++ b/R/plotTrendCluster.R @@ -1,20 +1,7 @@ -#' Plot Groups Function +#' @title Plot multiple trends of the multiple genes. #' -#' This function generates plots based on various parameters. -#' -#' @param scmpObj object of class scmpObj -#' @param xlab X-axis label. Default is "Pooled Pseudotime". -#' @param ylab Y-axis label. Default is "Pseudobulk Expression". -#' @param plot description -#' @param smoothness description -#' @param logs Whether to plot log of counts -#' @param logType Log type required -#' @param includeInflu description -#' @param significant description -#' @param parallel description -#' @param verbose description -#' @param summary_mode description -#' @param pseudoCount description +#' @description +#' Plot trends of multiple genes (clustered) across the binned pseudotime. #' #' @import ggplot2 #' @importFrom stats complete.cases cutree hclust @@ -22,7 +9,29 @@ #' @importFrom mclust Mclust #' @importFrom stringr str_split_i #' @importFrom stats as.dist cor kmeans -#' @return Generates a plot. +#' +#' @param scmpObj An object of class \code{\link{ScMaSigPro}}. +#' @param xlab X-axis label. (Default is "Pooled Pseudotime") +#' @param ylab Y-axis label. (Default is "Pseudobulk Expression") +#' @param plot Whether to plot 'coeff' or 'counts'. (Default is 'counts') +#' @param smoothness How smooth the trend should be. Setting to +#' higher values will result in more linear trends. (Default is 0.01) +#' @param logs Whether to log transform counts. (Default is TRUE) +#' @param logType How to log transform the values. Available options 'log', +#' 'log2', 'log10'. (Default is 'log') +#' @param includeInflu Include gene only if it has influential data. +#' (Default is TRUE) +#' @param significant Include gene only if the models are significant based on +#' \code{scMaSigPro::sc.filter()}. (Default is TRUE) +#' @param parallel Use forking process to run parallelly. (Default is FALSE) +#' (Currently, Windows is not supported) +#' @param verbose Print detailed output in the console. (Default is TRUE) +#' @param summary_mode Compress the expression values per replicate (if present) +#' per binned pseudotime point. Default is 'median'. Other option 'mean' +#' @param pseudoCount Add a pseudo-count before taking the log. (Default is 1) +#' +#' @return ggplot2 plot object. +#' @author Priyansh Srivastava \email{spriyansh29@@gmail.com} #' @export plotTrendCluster <- function(scmpObj, xlab = "Pooled Pseudotime", diff --git a/R/sc.p.vector.R b/R/sc.p.vector.R index 016e499..6951770 100644 --- a/R/sc.p.vector.R +++ b/R/sc.p.vector.R @@ -1,47 +1,47 @@ -#' Make regression fit for Binned Pseudotime. Adaption of maSigPro::p.vector() +#' @title Perform fitting with full model. #' -#' \code{sc.p.vector} performs a regression fit for each gene taking all variables -#' present in the model given by a regression matrix #' and returns a list of FDR corrected significant genes. +#' @description +#' Performs a regression fit for each gene taking all variables present in the +#' model. #' -#' @param scmpObj matrix containing normalized gene expression data. Genes must be in rows and arrays in columns. -#' @param p_value significance level. Default is 0.05. -#' @param mt_correction argument to pass to \code{p.adjust} function indicating the method for multiple testing adjustment of p.value. -#' @param min_na genes with less than this number of true numerical values will be excluded from the analysis. -#' Minimum value to estimate the model is (degree+1) x Groups + 1. Default is 6. -#' @param family the distribution function to be used in the glm model. -#' It must be specified as a function: \code{gaussian()}, \code{poisson()}, \code{negative.binomial(theta)}... -#' If NULL, the family will be \code{negative.binomial(theta)} when \code{counts = TRUE} or \code{gaussian()} when \code{counts = FALSE}. -#' @param epsilon argument to pass to \code{glm.control}, convergence tolerance in the iterative process to estimate the glm model. -#' @param verbose Name of the analyzed item to show on the screen while \code{T.fit} is in process. -#' @param offset Whether ro use offset for normalization -#' @param parallel Enable parallel processing -#' @param log_offset Take the log of teh offset. Similar to -#' 'log(estimateSizeFactorsForMatrix)' from DESeq2. -#' @param max_it Integer giving the maximal number of IWLS iterations. -#' @details \code{rownames(design)} and \code{colnames(data)} must be identical vectors -#' and indicate array naming. \code{rownames(data)} should contain unique gene IDs. -#' \code{colnames(design)} are the given names for the variables in the regression model. +#' @importFrom stats anova dist glm median na.omit p.adjust glm.control +#' @importFrom utils setTxtProgressBar txtProgressBar +#' @importFrom parallelly availableCores +#' @importFrom MASS negative.binomial glm.nb #' -#' @return ScMaSigPro object - -#' @references Conesa, A., Nueda M.J., Alberto Ferrer, A., Talon, T. 2006. -#' maSigPro: a Method to Identify Significant Differential Expression Profiles in Time-Course Microarray Experiments. -#' Bioinformatics 22, 1096-1102 +#' @param scmpObj An object of class \code{\link{ScMaSigPro}}. +#' @param p_value Significance level used for variable selection in the stepwise +#' regression. +#' @param mt_correction A character string specifying the p-value correction +#' method. +#' @param min_na Minimum values needed per gene across cells to estimate the +#' model. +#' @param family Distribution of the error term. +#' @param epsilon Model convergence tolerance. +#' @param offset logical value specifying whether to use offset during fitting. +#' @param log_offset A logical value specifying whether to take the logarithm of +#' the offsets. +#' @param max_it Maximum number of iterations to fit the model. +#' @param parallel Use forking process to run parallelly. (Default is FALSE) +#' (Currently, Windows is not supported) +#' @param verbose Print detailed output in the console. (Default is TRUE) #' -#' @author Ana Conesa, Maria Jose Nueda and Priyansh Srivastava \email{spriyansh29@@gmail.com} +#' @return An object of class \code{\link{ScMaSigPro}}, with updated `Profile` +#' slot. #' -#' @seealso \code{\link{T.fit}}, \code{\link{lm}} +#' @seealso \code{\link{VariableProfiles}} #' -#' @keywords regression +#' @references Conesa, A., Nueda M.J., Alberto Ferrer, A., Talon, T. 2006. +#' maSigPro: a Method to Identify Significant Differential Expression Profiles +#' in Time-Course Microarray Experiments. Bioinformatics 22, 1096-1102 #' -#' @importFrom stats anova dist glm median na.omit p.adjust glm.control -#' @importFrom utils setTxtProgressBar txtProgressBar -#' @importFrom parallelly availableCores -#' @importFrom MASS negative.binomial glm.nb +#' @author Priyansh Srivastava \email{spriyansh29@@gmail.com}, Ana Conesa and +#' Maria Jose Nueda, \email{mj.nueda@@ua.es} #' +#' @keywords regression models #' @export -#' -sc.p.vector <- function(scmpObj, p_value = 0.05, mt_correction = "BH", min_na = 6, +sc.p.vector <- function(scmpObj, p_value = 0.05, mt_correction = "BH", + min_na = 6, family = negative.binomial(theta = 10), epsilon = 1e-8, verbose = TRUE, @@ -66,7 +66,12 @@ sc.p.vector <- function(scmpObj, p_value = 0.05, mt_correction = "BH", min_na = # Add check # assert_that((dat@Dim[1] > 1), msg = paste(min_na, "for 'min_na' is too high. Try lowering the threshold.")) - assert_that(min_na <= ncol(dat), msg = paste(min_na, "for 'min_na' is too high. Try lowering the threshold.")) + assert_that(min_na <= ncol(dat), + msg = paste( + min_na, + "for 'min_na' is too high. Try lowering the threshold." + ) + ) # Removing rows with many missings: count.na <- function(x) (length(x) - length(x[is.na(x)])) @@ -132,7 +137,9 @@ sc.p.vector <- function(scmpObj, p_value = 0.05, mt_correction = "BH", min_na = compressed.data <- as.data.frame(scmpObj@Dense@colData) # Get bin_name and bin size - weight_df <- compressed.data[, c(scmpObj@Parameters@bin_size_colname), drop = TRUE] + weight_df <- compressed.data[, c(scmpObj@Parameters@bin_size_colname), + drop = TRUE + ] # Set names names(weight_df) <- rownames(compressed.data) @@ -149,7 +156,15 @@ sc.p.vector <- function(scmpObj, p_value = 0.05, mt_correction = "BH", min_na = } else { weight_df <- NULL } - p.vector.list <- mclapply(1:g, function(i, g_lapply = g, dat_lapply = dat, dis_lapply = dis, family_lapply = family, epsilon_lapply = epsilon, offsetdata_lapply = offsetData, pb_lapply = pb, weights_lapply = weight_df, verbose_lapply = verbose, max_it_lapply = max_it) { + p.vector.list <- mclapply(1:g, function(i, g_lapply = g, dat_lapply = dat, + dis_lapply = dis, + family_lapply = family, + epsilon_lapply = epsilon, + offsetdata_lapply = offsetData, + pb_lapply = pb, + weights_lapply = weight_df, + verbose_lapply = verbose, + max_it_lapply = max_it) { y <- as.numeric(dat_lapply[i, ]) # Print prog_lapplyress every 100 g_lapplyenes @@ -197,7 +212,11 @@ sc.p.vector <- function(scmpObj, p_value = 0.05, mt_correction = "BH", min_na = sc.p.vector <- unlist(p.vector.list, recursive = T, use.names = T) #---------------------------------------------------------------------- # Correct p-values using FDR correction and select significant genes - p.adjusted <- unlist(p.adjust(sc.p.vector, method = mt_correction, n = length(sc.p.vector)), + p.adjusted <- unlist( + p.adjust(sc.p.vector, + method = mt_correction, + n = length(sc.p.vector) + ), recursive = T, use.names = T ) names(p.adjusted) <- names(sc.p.vector) diff --git a/R/sc.set.poly.R b/R/sc.set.poly.R index 3177b75..1c90f96 100644 --- a/R/sc.set.poly.R +++ b/R/sc.set.poly.R @@ -1,26 +1,42 @@ -#' @title Create predictors and set polynomial. Adaption of maSigPro::make.design.matrix() +#' @title Set up polynomial models and create Predictor Matrix #' -#' @param scmpObject A 'ScMaSigPro' object. -#' @param poly_degree Degree of the design matrix (default: 2). -#' @param bin_ptime_col Name of the time column. -#' @param path_col Name of the path column. -#' -#' @return Returns the 'scmpObject' with an updated 'design' slot. +#' @description +#' Set up polynomial models and create Predictor Matrix that will contain the +#' independent variables. It is a wrapper around `maSigPro::make.design.matrix`. #' #' @importFrom maSigPro make.design.matrix +#' +#' @param scmpObj An object of class \code{\link{ScMaSigPro}}. +#' @param poly_degree Degree of the polynomial. +#' @param bin_ptime_col A character string representing the column name +#' for binned Pseudotime values in 'Dense' data. +#' @param path_col A character string representing the column name for branching +#' path assignment in 'Sparse' or 'Dense' slot. +#' +#' @return An object of class \code{\link{ScMaSigPro}}, with updated `Design` +#' slot. +#' +#' @seealso \code{\link{MatrixDesign}} Class. +#' +#' @references{Conesa, A., Nueda M.J., Alberto Ferrer, A., Talon, T. 2006. +#' maSigPro: a Method to Identify Significant Differential Expression Profiles +#' in Time-Course Microarray Experiments. Bioinformatics 22, 1096-1102} +#' +#' @author Priyansh Srivastava \email{spriyansh29@@gmail.com}, Ana Conesa and +#' Maria Jose Nueda, \email{mj.nueda@@ua.es} #' @export #' -sc.set.poly <- function(scmpObject, +sc.set.poly <- function(scmpObj, poly_degree = 2, - bin_ptime_col = scmpObject@Parameters@bin_ptime_col, - path_col = scmpObject@Parameters@path_col) { + bin_ptime_col = scmpObj@Parameters@bin_ptime_col, + path_col = scmpObj@Parameters@path_col) { # Check Object Validity - assert_that(is(scmpObject, "ScMaSigPro"), + assert_that(is(scmpObj, "ScMaSigPro"), msg = "Please provide object of class 'scMaSigPro'" ) # Extract cell metadata - comp.cell.metadata <- as.data.frame(scmpObject@Dense@colData) + comp.cell.metadata <- as.data.frame(scmpObj@Dense@colData) # pseudotime_colname assert_that((bin_ptime_col %in% colnames(comp.cell.metadata)), @@ -82,10 +98,10 @@ sc.set.poly <- function(scmpObject, ) # Update Slot - scmpObject@Design <- designObj + scmpObj@Design <- designObj # Update poly degree - scmpObject@Parameters@poly_degree <- as.integer(poly_degree) + scmpObj@Parameters@poly_degree <- as.integer(poly_degree) - return(scmpObject) + return(scmpObj) } diff --git a/R/sc.squeeze.R b/R/sc.squeeze.R index 8f8e80d..7e9a999 100644 --- a/R/sc.squeeze.R +++ b/R/sc.squeeze.R @@ -1,77 +1,67 @@ -#' @title Pseudo-bulking with optimal number of pseudotime based bins +#' @title Pseudo-bulking with optimal number of Pseudotime based bins #' #' @description -#' `squeeze()` discretizes a continuous time series column into bins +#' `sc.squeeze()` discretizes a continuous time series column into bins #' of equal size using entropy-based binning method. It automatically calculates -#' the optimal number of bins using one of the supported methods. The bin sizes -#' are also calculated and merged with the input cell_metadata. +#' the optimal number of bins using one of the supported methods. #' -#' @param scmpObject object of Class scMaSigPro. See \code{\link{ScMaSigPro}} -#' for more details. -#' @param ptime_col Name of the column in `cell.metadata` storing -#' Pseudotime values. Generated using `colData` from the \pkg{SingleCellExperiment} -#' package. (Default is "Pseudotime"). -#' @param path_col Name of the column in `cell.metadata` storing information -#' for Path. Generated using `colData` from the \pkg{SingleCellExperiment} -#' package. (Default is `path_prefix`). -#' @param bin_ptime_col Name of the column to store the computed Pseudotime -#' bins. -#' @param bin_mem_col Name of the column in the 'annotated_cell_metadata' -#' @param bin_col Name of the bin column name -#' @param bin_size_col Setting the name of the bin size column. -#' @param bin_method A character string specifying the method to use in order to -#' estimate the optimal number of bins. Available options: "Freedman.Diaconis", -#' "Sqrt", "Sturges", "Rice", "Doane", and "Scott.Normal". See \code{\link{estBinSize}} -#' for more details. (Default = "Sturges"). -#' @param drop_fac A numeric value specifying the factor by which to decrease the -#' number of bins if the initial binning results in too many bins. (Default = 1). -#' @param verbose Print detailed output in the console. (Default is TRUE) -#' per path iteratively. Options: "universal", "individual. (Default = "universal"). -#' @param additional_params Pass additional parameters as a named list. See Details. -#' @param assay_name Name of the Assay in the assay_name object from which retrieve the counts. -#' (Default = "counts"). -#' @param split_bins If bin sizes are greater than -#' @param prune_bins description -#' @param drop_trails description -#' @param fill_gaps description -#' @param cluster_count_by A character string specifying the method to use to -#' aggregate counts within each cluster. Available options are 'mean' or 'sum'. (Default = "sum"). -#' -#' @return -#' A data.frame that contains the original data plus additional columns: -#' - 'bin' : Name of the bin -#' - 'bin_size' : Size of the bin -#' - 'binned_time' : Interval range of each bin -#' This function returns the merged data.frame with new discretized -#' ptime_col, preserving the original rownames. -#' -#' @details -#' This function performs the following steps: -#' - Adds a new column 'cell' to the input data.frame which copies the row names. -#' - Extracts the time series data from the specified column of the input data.frame. -#' - Calculates the optimal number of bins using the specified method. -#' - Prints the estimated number of bins if verbose is set to TRUE. -#' - Discretizes the time series data into bins using the entropy-based binning method. -#' - Merges the original data.frame with the new binned time series data. -#' - Removes the 'cell' column and sets the row names back to the original row names of the input data.frame. -#' - Returns the merged data.frame. #' @importFrom assertthat assert_that #' @importFrom parallel mclapply detectCores #' @importFrom entropy discretize -#' @importFrom dplyr left_join join_by mutate select bind_rows group_by_at summarise rename_with +#' @importFrom dplyr left_join join_by mutate select bind_rows group_by_at +#' @importFrom dplyr summarise rename_with #' @importFrom magrittr %>% #' @importFrom rlang := #' @importFrom dplyr group_by filter row_number ungroup summarise summarize #' +#' @param scmpObj An object of class \code{\link{ScMaSigPro}}. +#' @param ptime_col A character string representing the column name +#' for inferred Pseudotime values in 'Sparse' data. (Default is "Pseudotime"). +#' @param path_col A character string representing the column name for branching +#' path assignment in 'Sparse' or 'Dense' data. (Default is `path_prefix`). +#' @param bin_ptime_col A character string representing the column name +#' for binned Pseudotime values in 'Dense' data. +#' (Default is "scmp_binned_pseudotime"). +#' @param bin_mem_col A character string representing the name of the column in +#' which cells per bin are stored. (Default is "scmp_bin_members"). +#' @param bin_col A character string representing the name of the column in which +#' bin labels are stored. (Default is "scmp_bin"). +#' @param bin_size_col A character string representing the name of the column in +#' which bin sizes per bin are stored. (Default is "scmp_bin_size"). +#' @param bin_method A character string representing the algorithm used for +#' binning. Available options: "Freedman.Diaconis", +#' "Sqrt", "Sturges", "Rice", "Doane", and "Scott.Normal". (Default = "Sturges") +#' @param drop_fac A numeric value specifying the factor by which to adjust the +#' number of bins if the initial binning results in too many/few bins. +#' (Default = 1). +#' @param assay_name Name of the Assay in sparse data from which the counts are +#' used. (Default = "counts"). +#' @param split_bins If bin sizes are greater than mean + sd, split the bin into +#' smaller bins by re-running the sc.squeeze() function. (Default = FALSE). +#' @param prune_bins If bin sizes are smaller than mean - sd, remove the bin. +#' (Default = FALSE). +#' @param drop_trails If the paths have different lengths of the binned pseudotime, +#' drop the bins from the path with more bins. (Default = FALSE). +#' @param fill_gaps If corresponding bin is missing for a time-point, pull the +#' successive bins and fill the gaps. +#' @param aggregate A character string specifying the method to aggregate counts +#' within each cluster. Available options are 'mean' or 'sum'. (Default = "sum"). +#' @param verbose Print detailed output in the console. (Default is TRUE) +#' @param additional_params Pass additional parameters as a named list. +#' See examples +#' +#' @return An object of class \code{\link{ScMaSigPro}}, with updated `Dense` +#' slot. +#' #' @author Priyansh Srivastava \email{spriyansh29@@gmail.com} #' -#' @seealso \code{\link{estBinSize}}, \code{\link{discretize}}, \code{\link{create_range}} +#' @seealso \code{\link{estBinSize}}, \code{\link{discretize}}, +#' \code{\link{create_range}} #' #' @export - -sc.squeeze <- function(scmpObject, - ptime_col = scmpObject@Parameters@ptime_col, - path_col = scmpObject@Parameters@path_col, +sc.squeeze <- function(scmpObj, + ptime_col = scmpObj@Parameters@ptime_col, + path_col = scmpObj@Parameters@path_col, bin_method = "Sturges", drop_fac = 1, verbose = FALSE, @@ -83,7 +73,7 @@ sc.squeeze <- function(scmpObject, prune_bins = FALSE, assay_name = "counts", drop_trails = FALSE, - cluster_count_by = "sum", + aggregate = "sum", fill_gaps = FALSE, additional_params = list(use_unique_time_points = FALSE)) { # Initiate Variable @@ -92,42 +82,52 @@ sc.squeeze <- function(scmpObject, cell <- "cell" # Check Object Validity - assert_that(is(scmpObject, "ScMaSigPro"), + assert_that(is(scmpObj, "ScMaSigPro"), msg = "Please provide object of class 'scMaSigPro'." ) # Extract cell metadata - raw_cell_metadata <- as.data.frame(colData(scmpObject@Sparse)) + raw_cell_metadata <- as.data.frame(colData(scmpObj@Sparse)) # Drop Columns if exist cols_to_drop <- c( - scmpObject@Parameters@bin_size_col, - scmpObject@Parameters@bin_ptime_col, + scmpObj@Parameters@bin_size_col, + scmpObj@Parameters@bin_ptime_col, "scmp_u_bound", "scmp_l_bound" ) - raw_cell_metadata <- raw_cell_metadata[, !colnames(raw_cell_metadata) %in% cols_to_drop, drop = FALSE] + raw_cell_metadata <- raw_cell_metadata[, !colnames( + raw_cell_metadata + ) %in% cols_to_drop, drop = FALSE] # Count slot assert_that( all( - assay_name %in% names(scmpObject@Sparse@assays@data@listData) + assay_name %in% names(scmpObj@Sparse@assays@data@listData) ), - msg = paste0("'", assay_name, "' ", "doesn't exit in scmpObject.") + msg = paste0("'", assay_name, "' ", "doesn't exit in scmpObj.") ) # Checks assert_that(ptime_col %in% colnames(raw_cell_metadata), - msg = paste0("'", ptime_col, "' does not exist in cell.level.metadata Please review the 'ptime_col' parameter.") + msg = paste0( + "'", ptime_col, + "' does not exist in cell.level.metadata Please review the 'ptime_col' parameter." + ) ) assert_that(path_col %in% colnames(raw_cell_metadata), - msg = paste0("'", path_col, "' does not exist in cell.level.metadata. Please review the 'path_col' parameter.") + msg = paste0( + "'", path_col, + "' does not exist in cell.level.metadata. Please review the 'path_col' parameter." + ) ) assert_that(drop_fac >= 0.3, msg = "Invalid value for 'drop_fac'. It should be between 0.3 and 1." ) assert_that( all( - bin_method %in% c("Freedman.Diaconis", "Sqrt", "Sturges", "Rice", "Doane", "Scott.Normal") + bin_method %in% c( + "Freedman.Diaconis", "Sqrt", "Sturges", "Rice", "Doane", "Scott.Normal" + ) ), msg = "Available binning methods are 'Freedman.Diaconis', 'Sqrt', 'Sturges', 'Rice', 'Doane', and 'Scott.Normal'" ) @@ -167,14 +167,21 @@ sc.squeeze <- function(scmpObject, } # Apply transformations on data - discrete.list <- lapply(avail.paths, function(path, design.frame = raw_cell_metadata, - drop_factor = drop_fac, path.col = path_col, - bin.size = bin_size_col, bin = bin_col, - time.col = ptime_col, method.bin = bin_method, + discrete.list <- lapply(avail.paths, function(path, + design.frame = raw_cell_metadata, + drop_factor = drop_fac, + path.col = path_col, + bin.size = bin_size_col, + bin = bin_col, + time.col = ptime_col, + method.bin = bin_method, bin.time.col = bin_ptime_col, - split = split_bins, bin.members.colname = bin_mem_col, - v = verbose, use.unique.time.points = additional_params$use_unique_time_points, - lbound = scmp_bin_lower_bound, ubound = scmp_bin_upper_bound) { + split = split_bins, + bin.members.colname = bin_mem_col, + v = verbose, + use.unique.time.points = additional_params$use_unique_time_points, + lbound = scmp_bin_lower_bound, + ubound = scmp_bin_upper_bound) { # Get the cells belonging to path path.frame <- design.frame[design.frame[[path.col]] == path, , drop = FALSE] @@ -228,7 +235,8 @@ sc.squeeze <- function(scmpObject, # Client-Verbose if (verbose) { message(paste( - "For", path, ",", length_n, "time points has been compressed to", nrow(bin_table), "bins" + "For", path, ",", length_n, "time points has been compressed to", + nrow(bin_table), "bins" )) } @@ -246,7 +254,10 @@ sc.squeeze <- function(scmpObject, # Split bins if (split) { if (verbose) { - message(paste("Optimizing bin sizes, with maximum allowed bin size as", max.allowed)) + message(paste( + "Optimizing bin sizes, with maximum allowed bin size as", + max.allowed + )) } # Adjust maximum Size @@ -278,12 +289,16 @@ sc.squeeze <- function(scmpObject, } if (verbose) { - message(paste("Optimizing bin sizes, with maximum allowed bin size as", max.allowed)) + message(paste( + "Optimizing bin sizes, with maximum allowed bin size as", + max.allowed + )) } if (verbose) { message(paste( - "Finally, for", path, ",", length_n, "time points has been compressed to", nrow(bin_table), "bins and the sum is ", sum(bin_table[[bin.size]]) + "Finally, for", path, ",", length_n, "time points has been compressed to", + nrow(bin_table), "bins and the sum is ", sum(bin_table[[bin.size]]) )) } @@ -358,7 +373,9 @@ sc.squeeze <- function(scmpObject, binned.path.frame[[path.col]] <- path - tmp.bin.size <- apply(binned.path.frame, 1, calc_bin_size, clus_mem_col = bin.members.colname) + tmp.bin.size <- apply(binned.path.frame, 1, calc_bin_size, + clus_mem_col = bin.members.colname + ) binned.path.frame[[bin.size]] <- tmp.bin.size # Subset the bin tables @@ -388,7 +405,7 @@ sc.squeeze <- function(scmpObject, as.data.frame() ## Add Processed Cell Matadata back with slot update - scmpObject@Sparse@colData <- DataFrame(processed_cell_metadata) + scmpObj@Sparse@colData <- DataFrame(processed_cell_metadata) # Set the 'cell' column as rownames rownames(processed_cell_metadata) <- processed_cell_metadata$cell @@ -461,26 +478,31 @@ sc.squeeze <- function(scmpObject, processed_binned_cell_metadata <- pB.frame } - compressed.sparse <- SingleCellExperiment(assays = list(bulk.counts = as(matrix(NA, nrow = 0, ncol = nrow(processed_binned_cell_metadata)), "dgCMatrix"))) + compressed.sparse <- SingleCellExperiment(assays = list( + bulk.counts = as( + matrix(NA, nrow = 0, ncol = nrow(processed_binned_cell_metadata)), + "dgCMatrix" + ) + )) compressed.sparse@colData <- DataFrame(processed_binned_cell_metadata) - scmpObject@Dense <- compressed.sparse + scmpObj@Dense <- compressed.sparse # Get Counts - scmpObject <- pb_counts( - scmpObject = scmpObject, + scmpObj <- pb_counts( + scmpObj = scmpObj, bin_mem_col = bin_mem_col, bin_col = bin_col, assay_name = assay_name, - cluster_count_by = cluster_count_by + cluster_count_by = aggregate ) # Update Slots - scmpObject@Parameters@ptime_col <- ptime_col - scmpObject@Parameters@path_col <- path_col - scmpObject@Parameters@bin_method <- bin_method - scmpObject@Parameters@bin_ptime_col <- bin_ptime_col - scmpObject@Parameters@bin_col <- bin_col - scmpObject@Parameters@bin_mem_col <- bin_mem_col - scmpObject@Parameters@bin_size_col <- bin_size_col - return(scmpObject) + scmpObj@Parameters@ptime_col <- ptime_col + scmpObj@Parameters@path_col <- path_col + scmpObj@Parameters@bin_method <- bin_method + scmpObj@Parameters@bin_ptime_col <- bin_ptime_col + scmpObj@Parameters@bin_col <- bin_col + scmpObj@Parameters@bin_mem_col <- bin_mem_col + scmpObj@Parameters@bin_size_col <- bin_size_col + return(scmpObj) } diff --git a/R/sc.t.fit.R b/R/sc.t.fit.R index 9193eea..99c451a 100644 --- a/R/sc.t.fit.R +++ b/R/sc.t.fit.R @@ -1,60 +1,41 @@ -#' Makes a stepwise regression fit for time series gene expression experiments +#' @title Perform stepwise regression fit to select for significant terms. #' -#' \code{s.t.fit} selects the best regression model for each gene using stepwise regression. +#' @description +#' Performs stepwise regression and selects the significant polynomial terms +#' from the full polynomial model. This function is succeeded by +#' \code{scMaSigPro::sc.p.vector()}. #' -#' @param scmpObj Can either be a \code{\link{p.vector}} object or a matrix containing expression scmpObj with the same requirements as for -#' the \code{\link{p.vector}} function. -#' @param selection_method Argument to be passed to the step function. Can be either \code{"backward"}, \code{"forward"}, \code{"two.ways.backward"}, or \code{"two.ways.forward"}. -#' @param p_value Significance level used for variable selection in the stepwise regression. -#' @param nvar_correction Argument for correcting T.fit significance level. See details. -#' @param family The distribution function to be used in the glm model. It must be the same used in \code{p.vector}. -#' @param epsilon Argument to pass to \code{glm.control}, convergence tolerance in the iterative process to estimate the glm model. -#' @param verbose Name of the analyzed item to show on the screen while \code{T.fit} is in process. -#' @param offset Whether ro use offset for normalization -#' @param parallel description -#' @param log_offset description -#' @param max_it description +#' @importFrom maSigPro position reg.coeffs +#' @importFrom stats influence.measures #' -#' @details -#' In the maSigPro approach, \code{\link{p.vector}} and \code{\link{T.fit}} are subsequent steps, meaning that significant genes are -#' first selected based on a general model, and then the significant variables for each gene are found by step-wise regression. +#' @param scmpObj An object of class \code{\link{ScMaSigPro}}. +#' @param selection_method Method for step-wise regression. +#' @param p_value Significance level used for variable selection in the stepwise +#' regression. +#' @param nvar_correction Argument for correcting significance level. See details. +#' @param family Distribution of the error term. +#' @param epsilon Model convergence tolerance. +#' @param offset A logical value specifying whether to use offset during fitting. +#' @param log_offset A logical value specifying whether to take the logarithm of +#' the offsets. +#' @param max_it Maximum number of iterations to fit the model. +#' @param parallel Use forking process to run parallelly. (Default is FALSE) +#' (Currently, Windows is not supported) +#' @param verbose Print detailed output in the console. (Default is TRUE) #' -#' The step regression can be \code{"backward"} or \code{"forward"}, indicating whether the step procedure starts from the -#' model with all or none variables. With the \code{"two.ways.backward"} or \code{"two.ways.forward"} options, the variables are both allowed to get in and out. -#' At each step, the p-value of each variable is computed, and variables get in/out of the model when this p-value is -#' lower or higher than the given threshold \code{p_value}. When \code{nvar_correction} is TRUE, the given significance level is corrected by the number of variables in the model. +#' @return An object of class \code{\link{ScMaSigPro}}, with updated `Estimate` +#' slot. #' -#' @return -#' A list containing the following elements: -#' \item{sol}{Matrix for summary results of the stepwise regression. For each selected gene, the following values are given: -#' \itemize{ -#' \item p-value of the regression ANOVA -#' \item R-squared of the model -#' \item p-value of the regression coefficients of the selected variables -#' }} -#' \item{coefficients}{Matrix containing regression coefficients for the adjusted models.} -#' \item{group.coeffs}{Matrix containing the coefficients of the implicit models of each experimental group.} -#' \item{variables}{Variables in the complete regression model.} -#' \item{G}{Total number of input genes.} -#' \item{g}{Number of genes taken in the regression fit.} -#' \item{dat}{Input analysis scmpObj matrix.} -#' \item{dis}{Regression design matrix.} -#' \item{selection_method}{Imputed step method for stepwise regression.} -#' \item{alloc}{Matrix of experimental design.} -#' \item{influ.info}{scmpObj frame of genes containing influential scmpObj.} +#' @seealso \code{\link{Estimates}} Class. #' #' @references{Conesa, A., Nueda M.J., Alberto Ferrer, A., Talon, T. 2006. -#' maSigPro: a Method to Identify Significant Differential Expression Profiles in Time-Course Microarray Experiments. -#' Bioinformatics 22, 1096-1102} -#' -#' @author{Ana Conesa and Maria Jose Nueda, \email{mj.nueda@@ua.es}} +#' maSigPro: a Method to Identify Significant Differential Expression Profiles +#' in Time-Course Microarray Experiments. Bioinformatics 22, 1096-1102} #' -#' @seealso{\code{\link{p.vector}}, \code{\link{step}}} +#' @author Priyansh Srivastava \email{spriyansh29@@gmail.com}, Ana Conesa and +#' Maria Jose Nueda, \email{mj.nueda@@ua.es} #' -#' @importFrom maSigPro position reg.coeffs -#' @importFrom stats influence.measures -#' @keywords regression -#' @keywords models +#' @keywords regression models #' @export sc.t.fit <- function(scmpObj, selection_method = "backward", @@ -75,7 +56,10 @@ sc.t.fit <- function(scmpObj, dis <- scmpObj@Design@predictor_matrix p_value <- scmpObj@Parameters@p_value groups.vector <- scmpObj@Design@groups.vector - groups.vector <- c(groups.vector[nchar(groups.vector) == min(nchar(groups.vector))][1], groups.vector) + groups.vector <- c( + groups.vector[nchar(groups.vector) == min(nchar(groups.vector))][1], + groups.vector + ) alloc <- scmpObj@Design@assignment_matrix G <- scmpObj@Parameters@g @@ -105,7 +89,9 @@ sc.t.fit <- function(scmpObj, compressed.data <- as.data.frame(scmpObj@Dense@colData) # Get bin_name and bin size - weight_df <- compressed.data[, c(scmpObj@Parameters@bin_size_colname), drop = TRUE] + weight_df <- compressed.data[, c(scmpObj@Parameters@bin_size_colname), + drop = TRUE + ] # Set names names(weight_df) <- rownames(compressed.data) @@ -162,82 +148,238 @@ sc.t.fit <- function(scmpObj, # Select the covariates if (selection_method == "backward") { - result_list <- parallel::mclapply(names(y_input), function(gene_name, dat_lapply = dat, dis_lapply = dis, family_lapply = family, epsilon_lapply = epsilon, offsetData_lapply = offsetData, pb_lapply = pb, verbose_lapply = verbose, vars_in_lapply = vars.in, Q_lapply = p_value, influ.info_lapply = influ.info, weights_lapply = weight_df, max_it_lapply = max_it) { - # result_list <- lapply(names(y_input), function(gene_name, g_lapply = g, dat_lapply = dat, dis_lapply = dis, family_lapply = family, epsilon_lapply = epsilon, offsetData_lapply = offsetData, pb_lapply = pb, verbose_lapply = verbose, vars_in_lapply = vars.in, Q_lapply = Q, influ.info_lapply = influ.info) { - y <- y_input[[gene_name]] + result_list <- parallel::mclapply(names(y_input), + function(gene_name, dat_lapply = dat, + dis_lapply = dis, + family_lapply = family, + epsilon_lapply = epsilon, + offsetData_lapply = offsetData, + pb_lapply = pb, + verbose_lapply = verbose, + vars_in_lapply = vars.in, + Q_lapply = p_value, + influ.info_lapply = influ.info, + weights_lapply = weight_df, + max_it_lapply = max_it) { + y <- y_input[[gene_name]] - reg_scmpObj <- sc.stepback(y = y, d = as.data.frame(dis_lapply), alfa = Q_lapply, family = family_lapply, epsilon = epsilon_lapply, useOffset = offsetData_lapply, useWeight = weights_lapply, max_it = max_it_lapply) - lmf_scmpObj <- glm(y ~ ., data = as.data.frame(dis_lapply), family = family_lapply, epsilon = epsilon_lapply, offset = offsetData_lapply, weights = weights_lapply, maxit = max_it_lapply) - model.glm.0_scmpObj <- glm(y ~ 1, family = family_lapply, epsilon = epsilon_lapply, offset = offsetData_lapply, weights = weights_lapply, maxit = max_it_lapply) - if (parallel == FALSE) { - if (verbose_lapply) { + reg_scmpObj <- sc.stepback( + y = y, d = as.data.frame(dis_lapply), + alfa = Q_lapply, family = family_lapply, + epsilon = epsilon_lapply, + useOffset = offsetData_lapply, + useWeight = weights_lapply, + max_it = max_it_lapply + ) + lmf_scmpObj <- glm(y ~ ., + data = as.data.frame(dis_lapply), + family = family_lapply, epsilon = epsilon_lapply, + offset = offsetData_lapply, weights = weights_lapply, + maxit = max_it_lapply + ) + model.glm.0_scmpObj <- glm(y ~ 1, + family = family_lapply, + epsilon = epsilon_lapply, + offset = offsetData_lapply, + weights = weights_lapply, + maxit = max_it_lapply + ) + if (parallel == FALSE) { if (verbose_lapply) { - i <- i + 1 - setTxtProgressBar(pb_lapply, i) + if (verbose_lapply) { + i <- i + 1 + setTxtProgressBar(pb_lapply, i) + } } } - } - return(extract_fitting(reg = reg_scmpObj, lmf = lmf_scmpObj, model.glm.0 = model.glm.0_scmpObj, dis = dis_lapply, family = family_lapply, name = gene_name, vars.in = vars_in_lapply, alfa = Q_lapply, influ.info = influ.info_lapply)) - # return(list( - # reg = reg, - # lmf = lmf, - # model.glm.0 = model.glm.0 - # )) - }, mc.cores = numCores, mc.set.seed = 2023) + return(extract_fitting( + reg = reg_scmpObj, lmf = lmf_scmpObj, + model.glm.0 = model.glm.0_scmpObj, dis = dis_lapply, + family = family_lapply, name = gene_name, + vars.in = vars_in_lapply, alfa = Q_lapply, + influ.info = influ.info_lapply + )) + # return(list( + # reg = reg, + # lmf = lmf, + # model.glm.0 = model.glm.0 + # )) + }, + mc.cores = numCores, mc.set.seed = 2023 + ) # }) } else if (selection_method == "forward") { - result_list <- parallel::mclapply(names(y_input), function(gene_name, g_lapply = g, dat_lapply = dat, dis_lapply = dis, family_lapply = family, epsilon_lapply = epsilon, offsetData_lapply = offsetData, pb_lapply = pb, verbose_lapply = verbose, vars_in_lapply = vars.in, Q_lapply = p_value, influ.info_lapply = influ.info, weights_lapply = weight_df, max_it_lapply = max_it) { - y <- y_input[[gene_name]] + result_list <- parallel::mclapply(names(y_input), + function(gene_name, g_lapply = g, + dat_lapply = dat, + dis_lapply = dis, + family_lapply = family, + epsilon_lapply = epsilon, + offsetData_lapply = offsetData, + pb_lapply = pb, + verbose_lapply = verbose, + vars_in_lapply = vars.in, + Q_lapply = p_value, + influ.info_lapply = influ.info, + weights_lapply = weight_df, + max_it_lapply = max_it) { + y <- y_input[[gene_name]] - reg_scmpObj <- sc.stepfor(y = y, d = as.data.frame(dis_lapply), alfa = Q_lapply, family = family_lapply, epsilon = epsilon_lapply, useOffset = offsetData_lapply, useWeight = weights_lapply, max_it = max_it_lapply) - lmf_scmpObj <- glm(y ~ ., data = as.data.frame(dis_lapply), family = family_lapply, epsilon = epsilon_lapply, offset = offsetData_lapply, weights = weights_lapply, maxit = max_it_lapply) - model.glm.0_scmpObj <- glm(y ~ 1, family = family_lapply, epsilon = epsilon_lapply, offset = offsetData_lapply, weights = weights_lapply, maxit = max_it_lapply) - div <- c(1:round(g / 100)) * 100 - if (parallel == FALSE) { - if (is.element(y, div) && verbose_lapply) { - if (verbose) { - setTxtProgressBar(pb_lapply, y) + reg_scmpObj <- sc.stepfor( + y = y, d = as.data.frame(dis_lapply), + alfa = Q_lapply, family = family_lapply, + epsilon = epsilon_lapply, + useOffset = offsetData_lapply, + useWeight = weights_lapply, + max_it = max_it_lapply + ) + lmf_scmpObj <- glm(y ~ ., + data = as.data.frame(dis_lapply), + family = family_lapply, epsilon = epsilon_lapply, + offset = offsetData_lapply, weights = weights_lapply, + maxit = max_it_lapply + ) + model.glm.0_scmpObj <- glm(y ~ 1, + family = family_lapply, + epsilon = epsilon_lapply, + offset = offsetData_lapply, + weights = weights_lapply, + maxit = max_it_lapply + ) + div <- c(1:round(g / 100)) * 100 + if (parallel == FALSE) { + if (is.element(y, div) && verbose_lapply) { + if (verbose) { + setTxtProgressBar(pb_lapply, y) + } } } - } - return(extract_fitting(reg = reg_scmpObj, lmf = lmf_scmpObj, model.glm.0 = model.glm.0_scmpObj, dis = dis_lapply, family = family_lapply, name = gene_name, vars.in = vars_in_lapply, alfa = Q_lapply, influ.info = influ.info_lapply)) - }, mc.cores = numCores, mc.set.seed = 2023) + return(extract_fitting( + reg = reg_scmpObj, lmf = lmf_scmpObj, + model.glm.0 = model.glm.0_scmpObj, + dis = dis_lapply, family = family_lapply, + name = gene_name, vars.in = vars_in_lapply, + alfa = Q_lapply, influ.info = influ.info_lapply + )) + }, + mc.cores = numCores, mc.set.seed = 2023 + ) } else if (selection_method == "two.ways.backward") { - result_list <- parallel::mclapply(names(y_input), function(gene_name, g_lapply = g, dat_lapply = dat, dis_lapply = dis, family_lapply = family, epsilon_lapply = epsilon, offsetData_lapply = offsetData, pb_lapply = pb, verbose_lapply = verbose, vars_in_lapply = vars.in, Q_lapply = p_value, influ.info_lapply = influ.info, weights_lapply = weight_df, max_it_lapply = max_it) { - y <- y_input[[gene_name]] + result_list <- parallel::mclapply(names(y_input), + function(gene_name, + g_lapply = g, + dat_lapply = dat, + dis_lapply = dis, + family_lapply = family, + epsilon_lapply = epsilon, + offsetData_lapply = offsetData, + pb_lapply = pb, + verbose_lapply = verbose, + vars_in_lapply = vars.in, + Q_lapply = p_value, + influ.info_lapply = influ.info, + weights_lapply = weight_df, + max_it_lapply = max_it) { + y <- y_input[[gene_name]] - reg_scmpObj <- sc.two.ways.stepback(y = y, d = as.data.frame(dis_lapply), alfa = Q_lapply, family = family_lapply, epsilon = epsilon_lapply, useOffset = offsetData_lapply, useWeight = weights_lapply, max_it = max_it_lapply) - lmf_scmpObj <- glm(y ~ ., data = as.data.frame(dis_lapply), family = family_lapply, epsilon = epsilon_lapply, offset = offsetData_lapply, weights = weights_lapply, maxit = max_it_lapply) - model.glm.0_scmpObj <- glm(y ~ 1, family = family_lapply, epsilon = epsilon_lapply, offset = offsetData_lapply, weights = weights_lapply, maxit = max_it_lapply) - div <- c(1:round(g / 100)) * 100 - if (parallel == FALSE) { - if (is.element(y, div) && verbose_lapply) { - if (verbose) { - setTxtProgressBar(pb_lapply, y) + reg_scmpObj <- sc.two.ways.stepback( + y = y, d = as.data.frame(dis_lapply), + alfa = Q_lapply, + family = family_lapply, + epsilon = epsilon_lapply, + useOffset = offsetData_lapply, + useWeight = weights_lapply, + max_it = max_it_lapply + ) + lmf_scmpObj <- glm(y ~ ., + data = as.data.frame(dis_lapply), + family = family_lapply, epsilon = epsilon_lapply, + offset = offsetData_lapply, weights = weights_lapply, + maxit = max_it_lapply + ) + model.glm.0_scmpObj <- glm(y ~ 1, + family = family_lapply, + epsilon = epsilon_lapply, + offset = offsetData_lapply, + weights = weights_lapply, + maxit = max_it_lapply + ) + div <- c(1:round(g / 100)) * 100 + if (parallel == FALSE) { + if (is.element(y, div) && verbose_lapply) { + if (verbose) { + setTxtProgressBar(pb_lapply, y) + } } } - } - return(extract_fitting(reg = reg_scmpObj, lmf = lmf_scmpObj, model.glm.0 = model.glm.0_scmpObj, dis = dis_lapply, family = family_lapply, name = gene_name, vars.in = vars_in_lapply, alfa = Q_lapply, influ.info = influ.info_lapply)) - }, mc.cores = numCores, mc.set.seed = 2023) + return(extract_fitting( + reg = reg_scmpObj, lmf = lmf_scmpObj, + model.glm.0 = model.glm.0_scmpObj, + dis = dis_lapply, family = family_lapply, + name = gene_name, vars.in = vars_in_lapply, + alfa = Q_lapply, influ.info = influ.info_lapply + )) + }, + mc.cores = numCores, mc.set.seed = 2023 + ) # }) } else if (selection_method == "two.ways.forward") { - result_list <- parallel::mclapply(names(y_input), function(gene_name, g_lapply = g, dat_lapply = dat, dis_lapply = dis, family_lapply = family, epsilon_lapply = epsilon, offsetData_lapply = offsetData, pb_lapply = pb, verbose_lapply = verbose, vars_in_lapply = vars.in, Q_lapply = p_value, influ.info_lapply = influ.info, weights_lapply = weight_df, max_it_lapply = max_it) { - y <- y_input[[gene_name]] + result_list <- parallel::mclapply(names(y_input), + function(gene_name, g_lapply = g, + dat_lapply = dat, + dis_lapply = dis, + family_lapply = family, + epsilon_lapply = epsilon, + offsetData_lapply = offsetData, + pb_lapply = pb, + verbose_lapply = verbose, + vars_in_lapply = vars.in, + Q_lapply = p_value, + influ.info_lapply = influ.info, + weights_lapply = weight_df, + max_it_lapply = max_it) { + y <- y_input[[gene_name]] - reg_scmpObj <- sc.two.ways.stepfor(y = y, d = as.data.frame(dis_lapply), alfa = Q_lapply, family = family_lapply, epsilon = epsilon_lapply, useOffset = offsetData_lapply, useWeight = weights_lapply, max_it = max_it_lapply) - lmf_scmpObj <- glm(y ~ ., data = as.data.frame(dis_lapply), family = family_lapply, epsilon = epsilon_lapply, offset = offsetData_lapply, weights = weights_lapply, maxit = max_it_lapply) - model.glm.0_scmpObj <- glm(y ~ 1, family = family_lapply, epsilon = epsilon_lapply, offset = offsetData_lapply, weights = weights_lapply, maxit = max_it_lapply) - div <- c(1:round(g / 100)) * 100 - if (parallel == FALSE) { - if (is.element(y, div) && verbose_lapply) { - if (verbose) { - setTxtProgressBar(pb_lapply, y) + reg_scmpObj <- sc.two.ways.stepfor( + y = y, d = as.data.frame(dis_lapply), + alfa = Q_lapply, family = family_lapply, + epsilon = epsilon_lapply, + useOffset = offsetData_lapply, + useWeight = weights_lapply, + max_it = max_it_lapply + ) + lmf_scmpObj <- glm(y ~ ., + data = as.data.frame(dis_lapply), + family = family_lapply, epsilon = epsilon_lapply, + offset = offsetData_lapply, weights = weights_lapply, + maxit = max_it_lapply + ) + model.glm.0_scmpObj <- glm(y ~ 1, + family = family_lapply, + epsilon = epsilon_lapply, + offset = offsetData_lapply, + weights = weights_lapply, + maxit = max_it_lapply + ) + div <- c(1:round(g / 100)) * 100 + if (parallel == FALSE) { + if (is.element(y, div) && verbose_lapply) { + if (verbose) { + setTxtProgressBar(pb_lapply, y) + } } } - } - return(extract_fitting(reg = reg_scmpObj, lmf = lmf_scmpObj, model.glm.0 = model.glm.0_scmpObj, dis = dis_lapply, family = family_lapply, name = gene_name, vars.in = vars_in_lapply, alfa = Q_lapply, influ.info = influ.info_lapply)) - }, mc.cores = numCores, mc.set.seed = 2023) + return(extract_fitting( + reg = reg_scmpObj, lmf = lmf_scmpObj, + model.glm.0 = model.glm.0_scmpObj, + dis = dis_lapply, family = family_lapply, + name = gene_name, vars.in = vars_in_lapply, + alfa = Q_lapply, influ.info = influ.info_lapply + )) + }, + mc.cores = numCores, mc.set.seed = 2023 + ) } else { stop("stepwise method must be one of backward, forward, two.ways.backward, two.ways.forward") } @@ -265,7 +407,10 @@ sc.t.fit <- function(scmpObj, # Assuming 'parallel' is your list # influ.info.list <- influ.info.list[!sapply(influ.info.list, function(x) is.logical(x))] - influ.info.list <- influ.info.list[!vapply(influ.info.list, is.logical, logical(1))] + influ.info.list <- influ.info.list[!vapply( + influ.info.list, is.logical, + logical(1) + )] # Lapply to remove column 1 influ.info.list <- lapply(influ.info.list, function(element) { @@ -309,7 +454,10 @@ sc.t.fit <- function(scmpObj, A <- NULL col.names <- NULL for (l in 1:length(groups)) { - B <- reg.coeffs(coefficients = coefficients[w, ], groups.vector = groups.vector, group = groups[l]) + B <- reg.coeffs( + coefficients = coefficients[w, ], + groups.vector = groups.vector, group = groups[l] + ) cols <- paste(rep(groups[l], each = length(B)), paste("beta", c(0:(length(B) - 1)), sep = ""), sep = "_" @@ -326,7 +474,11 @@ sc.t.fit <- function(scmpObj, if (!is.null(influ.info)) { if (verbose) { - message(paste("\nInfluence:", ncol(influ.info), "genes with influential onservations detected. Model validation for these genes is recommended")) + message(paste( + "\nInfluence:", + ncol(influ.info), + "genes with influential onservations detected. Model validation for these genes is recommended" + )) } } else { if (verbose) { diff --git a/R/show_functions.R b/R/show_functions.R index 3a26bc1..9bffde2 100644 --- a/R/show_functions.R +++ b/R/show_functions.R @@ -13,18 +13,20 @@ # 10. showGroupCoeff(): ############################################################################### - -#' Show or Return the Coefficent matrix +#' @title Show or Return the Coefficent matrix #' -#' This function is used to view or return the coeffients of the provided scMaSigPro object. +#' @description +#' This function is used to view or return the coefficents from the provided +#' scMaSigPro object. #' -#' @param scmpObj an object of class 'ScMaSigPro'. This object should contain the computed solution. -#' @param view logical, whether to view the solution. If TRUE (default), the solution is displayed. -#' @param includeInflu description -#' @param return logical, whether to return the solution. If FALSE (default), the solution is not returned. +#' @param scmpObj An object of class \code{\link{ScMaSigPro}}. +#' @param view Whether to view the data in the explorer. (Default: FALSE) +#' @param includeInflu Whether to include genes with inluential observations. +#' @param return Whether to return the data. (Default: TRUE) #' -#' @return The computed solution as a data.frame if return is set to TRUE. -#' If return is FALSE, the function does not return anything. +#' @return The computed Coefficent matrix as a dataframe. +#' +#' @author Priyansh Srivastava \email{spriyansh29@@gmail.com} #' #' @export showCoeff <- function(scmpObj, view = FALSE, return = TRUE, includeInflu = TRUE) { @@ -58,17 +60,19 @@ showCoeff <- function(scmpObj, view = FALSE, return = TRUE, includeInflu = TRUE) } ############################################################################### - -#' Show or Return the matrix of influential genes +#' @title Return the matrix of genes with influential observation #' -#' This function is used to view or return the solution of the provided scMaSigPro object. +#' @description +#' This function is used to view or return the matrix of genes with influential +#' observation from the provided scMaSigPro object. #' -#' @param scmpObj an object of class 'ScMaSigPro'. This object should contain the computed solution. -#' @param view logical, whether to view the solution. If TRUE (default), the solution is displayed. -#' @param return logical, whether to return the solution. If FALSE (default), the solution is not returned. +#' @param scmpObj An object of class \code{\link{ScMaSigPro}}. +#' @param view Whether to view the data in the explorer. (Default: FALSE) +#' @param return Whether to return the data. (Default: TRUE) #' -#' @return The computed solution as a data.frame if return is set to TRUE. -#' If return is FALSE, the function does not return anything. +#' @return Matrix of genes with influential observation. +#' +#' @author Priyansh Srivastava \email{spriyansh29@@gmail.com} #' #' @export showInflu <- function(scmpObj, view = FALSE, return = TRUE) { @@ -97,18 +101,20 @@ showInflu <- function(scmpObj, view = FALSE, return = TRUE) { } ############################################################################### - -#' Show or Return the t scores +#' @title Show or Return the t-score matrix #' -#' This function is used to view or return the solution of the provided scMaSigPro object. +#' @description +#' This function is used to view or return the t-scores from the provided +#' scMaSigPro object. #' -#' @param scmpObj an object of class 'ScMaSigPro'. This object should contain the computed solution. -#' @param view logical, whether to view the solution. If TRUE (default), the solution is displayed. -#' @param includeInflu logical, whether to add gene with influential data in the solution. -#' @param return logical, whether to return the solution. If FALSE (default), the solution is not returned. +#' @param scmpObj An object of class \code{\link{ScMaSigPro}}. +#' @param view Whether to view the data in the explorer. (Default: FALSE) +#' @param includeInflu Whether to include genes with inluential observations. +#' @param return Whether to return the data. (Default: TRUE) #' -#' @return The computed solution as a data.frame if return is set to TRUE. -#' If return is FALSE, the function does not return anything. +#' @return The computed t-score matrix as a dataframe. +#' +#' @author Priyansh Srivastava \email{spriyansh29@@gmail.com} #' #' @export showTS <- function(scmpObj, view = FALSE, return = TRUE, includeInflu = TRUE) { @@ -142,18 +148,20 @@ showTS <- function(scmpObj, view = FALSE, return = TRUE, includeInflu = TRUE) { } ############################################################################### - -#' Show or Return the Solution +#' @title Show or Return the P-values after model fitting. #' -#' This function is used to view or return the solution of the provided scMaSigPro object. +#' @description +#' This function is used to view or return the matrix of p-values for each term +#' and the full model from the provided scMaSigPro object. #' -#' @param scmpObj an object of class 'ScMaSigPro'. This object should contain the computed solution. -#' @param view logical, whether to view the solution. If TRUE (default), the solution is displayed. -#' @param includeInflu logical, whether to add gene with influential data in the solution. -#' @param return logical, whether to return the solution. If TRUE (default), returned. +#' @param scmpObj An object of class \code{\link{ScMaSigPro}}. +#' @param view Whether to view the data in the explorer. (Default: FALSE) +#' @param includeInflu Whether to include genes with inluential observations. +#' @param return Whether to return the data. (Default: TRUE) #' -#' @return The computed solution as a data.frame if return is set to TRUE. -#' If return is FALSE, the function does not return anything. +#' @return The computed p-values for each term and full model as a dataframe. +#' +#' @author Priyansh Srivastava \email{spriyansh29@@gmail.com} #' #' @export showSol <- function(scmpObj, view = FALSE, return = TRUE, includeInflu = TRUE) { @@ -187,18 +195,23 @@ showSol <- function(scmpObj, view = FALSE, return = TRUE, includeInflu = TRUE) { } ############################################################################### -#' Show or Return the Solution +#' @title Show or Return the counts for non-flat profile. #' -#' This function is used to view or return the solution of the provided scMaSigPro object. +#' @description +#' This function is used to view or return the pseudo-bulk counts of the genes +#' with non-flat profiles. from the provided scMaSigPro object. #' -#' @param scmpObj an object of class 'ScMaSigPro'. This object should contain the computed solution. -#' @param view logical, whether to view the solution. If TRUE (default), the solution is displayed. -#' @param includeInflu logical, whether to add gene with influential data in the solution. -#' @param return logical, whether to return the solution. If FALSE (default), the solution is not returned. -#' -#' @return The computed solution as a data.frame if return is set to TRUE. -#' If return is FALSE, the function does not return anything. #' @importFrom utils View +#' +#' @param scmpObj An object of class \code{\link{ScMaSigPro}}. +#' @param view Whether to view the data in the explorer. (Default: FALSE) +#' @param includeInflu Whether to include genes with inluential observations. +#' @param return Whether to return the data. (Default: TRUE) +#' +#' @return Pseudo-bulk counts as matrix for genes with non-flat profiles. +#' +#' @author Priyansh Srivastava \email{spriyansh29@@gmail.com} +#' #' @export showSigProf <- function(scmpObj, view = FALSE, return = TRUE, includeInflu = FALSE) { # Check Object Validity @@ -234,13 +247,16 @@ showSigProf <- function(scmpObj, view = FALSE, return = TRUE, includeInflu = FAL } ############################################################################### -#' Show the terms of the polynomial term +#' @title Print the full model formula. +#' +#' @description +#' Print the full model formula in console as a string. #' -#' This function is used to view or return the solution of the provided scMaSigPro object. +#' @param scmpObj An object of class \code{\link{ScMaSigPro}}. #' -#' @param scmpObj an object of class 'ScMaSigPro'. This object should contain the computed solution. +#' @return Character string of the formula for the full model. #' -#' @return Return the terms of the polynomial model. +#' @author Priyansh Srivastava \email{spriyansh29@@gmail.com} #' #' @export showPoly <- function(scmpObj) { @@ -273,17 +289,23 @@ showPoly <- function(scmpObj) { } ############################################################################### -#' Show or Return the parameters used during the analysis +#' @title Show the parameters used during the workflow. #' -#' This function is used to view or return the solution of the provided scMaSigPro object. +#' @description +#' Get or View all the parameters used during the workflow. #' -#' @param scmpObj an object of class 'ScMaSigPro'. This object should contain the computed solution. -#' @param view logical, whether to view the solution. If TRUE (default), the solution is displayed. -#' @param return logical, whether to return the solution. If FALSE (default), the solution is not returned. +#' @importFrom methods slot slotNames +#' @param scmpObj An object of class \code{\link{ScMaSigPro}}. +#' @param view Whether to view the data in the explorer. (Default: FALSE) +#' @param return Whether to return the data. (Default: TRUE) #' #' @return The computed solution as a data.frame if return is set to TRUE. #' If return is FALSE, the function does not return anything. -#' @importFrom methods slot slotNames +#' +#' @return Dataframe of the parameters used in the analysis. +#' +#' @author Priyansh Srivastava \email{spriyansh29@@gmail.com} +#' #' @export showParams <- function(scmpObj, view = FALSE, return = TRUE) { # Check Object Validity @@ -336,17 +358,18 @@ showParams <- function(scmpObj, view = FALSE, return = TRUE) { } ############################################################################### -#' Show or Return the Group wise coefficents +#' @title Show or Return the Branching Path Coefficent matrix #' -#' This function is used to view or return the group of the provided scMaSigPro object. +#' @description +#' This function is used to view or return the branching paths coefficents from +#' the provided scMaSigPro object. #' -#' @param scmpObj an object of class 'ScMaSigPro'. This object should contain the computed solution. -#' @param view logical, whether to view the solution. If TRUE (default), the solution is displayed. -#' @param return logical, whether to return the solution. If FALSE (default), the solution is not returned. -#' @param includeInflu description +#' @param scmpObj An object of class \code{\link{ScMaSigPro}}. +#' @param view Whether to view the data in the explorer. (Default: FALSE) +#' @param return Whether to return the data. (Default: TRUE) +#' @param includeInflu Whether to include genes with inluential observations. #' -#' @return The computed solution as a data.frame if return is set to TRUE. -#' If return is FALSE, the function does not return anything. +#' @return The computed branching path coefficent matrix as a dataframe. #' #' @export showGroupCoeff <- function(scmpObj, view = FALSE, return = TRUE, includeInflu = TRUE) { @@ -380,11 +403,12 @@ showGroupCoeff <- function(scmpObj, view = FALSE, return = TRUE, includeInflu = } ############################################################################### -#' Show ScMaSigPro Object Information +#' @title Show ScMaSigPro Object Information #' -#' This method displays basic information about the ScMaSigPro object when the object -#' is printed in the console. The method is automatically called when the user writes -#' the name of the object in the console. +#' @description +#' This method displays basic information about the ScMaSigPro object when the +#' object is printed in the console. The method is automatically called when the +#' user writes the name of the object in the console. #' #' @param object An object of class \code{ScMaSigPro}. #' @@ -452,5 +476,3 @@ extract_info <- function(data, return_type = "avg_bin_size", bin_size_col, path_ stop("Invalid return_type. Choose between 'avg_bin_size' and 'num_bins'.") } } - -#-X-# diff --git a/README.md b/README.md index d3c88fc..283cf21 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Implementation of MaSigPro for scRNA-Seq Data -[![lint](https://github.com/BioBam/scMaSigPro/actions/workflows/lint.yaml/badge.svg?branch=main)](https://github.com/BioBam/scMaSigPro/actions/workflows/lint.yaml) [![R-CMD-Check](https://github.com/BioBam/scMaSigPro/actions/workflows/r.yml/badge.svg?branch=main)](https://github.com/BioBam/scMaSigPro/actions/workflows/r.yml) [![test-coverage](https://github.com/BioBam/scMaSigPro/actions/workflows/test-coverage.yaml/badge.svg?branch=main)](https://github.com/BioBam/scMaSigPro/actions/workflows/test-coverage.yaml) + [![R-CMD-Check](https://github.com/BioBam/scMaSigPro/actions/workflows/r.yml/badge.svg?branch=main)](https://github.com/BioBam/scMaSigPro/actions/workflows/r.yml) [![test-coverage](https://github.com/BioBam/scMaSigPro/actions/workflows/test-coverage.yaml/badge.svg?branch=main)](https://github.com/BioBam/scMaSigPro/actions/workflows/test-coverage.yaml) --- @@ -45,14 +45,14 @@ data("splat.sim", package = "scMaSigPro") ``` # Helper Function to convert annotated SCE object to scmpObject -scmp.ob <- as.scmp( +scmp.ob <- as_scmp( object = splat.sim, from = "sce", align_pseudotime = FALSE, verbose = TRUE, additional_params = list( labels_exist = TRUE, - existing_pseudotime_colname = "Step", - existing_path_colname = "Group" + exist_ptime_col = "Step", + exist_path_col = "Group" ) ) ``` diff --git a/man/ParameterConfig-class.Rd b/man/ParameterConfig-class.Rd index e9bfd16..baed751 100644 --- a/man/ParameterConfig-class.Rd +++ b/man/ParameterConfig-class.Rd @@ -28,7 +28,7 @@ for inferred Pseudotime values in 'Sparse' data. See `colData` from the binning.} \item{\code{path_col}}{A character string representing the column name for branching -path assignment in 'Sparse' or 'Dense'data. See `colData` from the +path assignment in 'Sparse' or 'Dense' data. See `colData` from the \pkg{SingleCellExperiment} package.} \item{\code{bin_col}}{A character string representing the name of the column in which @@ -47,7 +47,8 @@ metadata containing cell level annotations. (Default is "cell_type").} \item{\code{p_value}}{Significance Level.} -\item{\code{min_na}}{Minimum values needed to estimate the model.} +\item{\code{min_na}}{Minimum values needed per gene across cells to estimate the +model.} \item{\code{mt_correction}}{A character string specifying the p-value correction method.} @@ -62,17 +63,18 @@ regression.} \item{\code{log_offset}}{A logical value specifying whether to take the logarithm of the offsets.} -\item{\code{max_it}}{Integer. Maximum number of iterations to fit the model.} +\item{\code{max_it}}{Maximum number of iterations to fit the model.} -\item{\code{poly_degree}}{Integer with the polynomial degree to fit the regression. 1} +\item{\code{poly_degree}}{Order of the polynomial linear model.} -\item{\code{distribution}}{Distribution used} +\item{\code{distribution}}{Distribution of the error term.} -\item{\code{cluster_method}}{Description} +\item{\code{cluster_method}}{Clustering method used for clustering significant genes.} -\item{\code{use_dim}}{description} +\item{\code{use_dim}}{Dimension to use for filling the missing values before +clustering.} -\item{\code{fill_na}}{description} +\item{\code{fill_na}}{Method to fill the missing values.} }} \keyword{classes} diff --git a/man/align_pseudotime.Rd b/man/align_pseudotime.Rd index 881d02e..58a5f0f 100644 --- a/man/align_pseudotime.Rd +++ b/man/align_pseudotime.Rd @@ -13,6 +13,8 @@ align_pseudotime( ) } \arguments{ +\item{scmpObj}{description} + \item{ptime_col}{A character string representing the column name for inferred Pseudotime values in 'Sparse' data. See `colData` from the \pkg{SingleCellExperiment} package. (Default is "Pseudotime")} diff --git a/man/dot-ScMaSigPro_show.Rd b/man/dot-ScMaSigPro_show.Rd index 9d8c91f..e2c7f1a 100644 --- a/man/dot-ScMaSigPro_show.Rd +++ b/man/dot-ScMaSigPro_show.Rd @@ -10,8 +10,8 @@ \item{object}{An object of class \code{ScMaSigPro}.} } \description{ -This method displays basic information about the ScMaSigPro object when the object -is printed in the console. The method is automatically called when the user writes -the name of the object in the console. +This method displays basic information about the ScMaSigPro object when the +object is printed in the console. The method is automatically called when the +user writes the name of the object in the console. } \keyword{internal} diff --git a/man/pb_counts.Rd b/man/pb_counts.Rd index dc2ed74..3e414b9 100644 --- a/man/pb_counts.Rd +++ b/man/pb_counts.Rd @@ -5,15 +5,15 @@ \title{Create Pseduo-bulk Counts} \usage{ pb_counts( - scmpObject, - bin_mem_col = scmpObject@Parameters@bin_mem_col, - bin_col = scmpObject@Parameters@bin_col, + scmpObj, + bin_mem_col = scmpObj@Parameters@bin_mem_col, + bin_col = scmpObj@Parameters@bin_col, assay_name = "counts", cluster_count_by = "sum" ) } \arguments{ -\item{scmpObject}{object of Class scMaSigPro. See \code{\link{ScMaSigPro}} +\item{scmpObj}{object of Class scMaSigPro. See \code{\link{ScMaSigPro}} for more details.} \item{bin_mem_col}{Column name in the Dense metadata storing information diff --git a/man/plotBinTile.Rd b/man/plotBinTile.Rd index 8125dc2..51cd0c6 100644 --- a/man/plotBinTile.Rd +++ b/man/plotBinTile.Rd @@ -12,22 +12,26 @@ plotBinTile( ) } \arguments{ -\item{scmpObj}{A ScMaSigPro class object with an additional slot 'Dense' that -contains compression information.} +\item{scmpObj}{An object of class \code{\link{ScMaSigPro}}.} -\item{path_col}{Name of the column in `cell.metadata` storing information -for Path.} +\item{path_col}{A character string representing the column name for branching +path assignment in 'Sparse' or 'Dense' slot.} -\item{bin_size_col}{A title of the barplot} +\item{bin_size_col}{A character string representing the name of the column in +which bin sizes per bin are stored. (Default is "scmp_bin_size").} -\item{bin_ptime_col}{description} +\item{bin_ptime_col}{A character string representing the column name +for binned Pseudotime values in 'Dense' data. +(Default is "scmp_binned_pseudotime").} } \value{ -A tile plot made with `geom_tile()`, visualizing the bin sizes across -different binned time and paths. +ggplot2 plot object. } \description{ -This function generates plots to visualize the dense cell metadata from a ScMaSigPro -object. It produces tile plot to display the bin sizes across different -binned time intervals and paths. +This function generates plots to visualize the Dense slot cell metadata +from a ScMaSigPro object. It produces tile plot to display the bin sizes +across different binned time intervals and paths. +} +\author{ +Priyansh Srivastava \email{spriyansh29@gmail.com} } diff --git a/man/plotIntersect.Rd b/man/plotIntersect.Rd index 41b3106..4b1ceec 100644 --- a/man/plotIntersect.Rd +++ b/man/plotIntersect.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/plotIntersect.R \name{plotIntersect} \alias{plotIntersect} -\title{Generate UpSet Plot on Intersection of Significant Genes from scmpObject} +\title{Generate UpSet Plot} \usage{ plotIntersect( scmpObj, @@ -13,23 +13,27 @@ plotIntersect( ) } \arguments{ -\item{scmpObj}{An object of class ScMaSigPro} +\item{scmpObj}{An object of class \code{\link{ScMaSigPro}}.} -\item{min_intersection_size}{minimal number of observations in an intersection +\item{min_intersection_size}{Minimal number of observations in an intersection for it to be included.} -\item{keep_empty_groups}{whether empty sets should be kept (including sets +\item{keep_empty_groups}{Whether empty sets should be kept (including sets which are only empty after filtering by size)} -\item{width_ratio}{ratio of the overall set size width to intersection matrix +\item{width_ratio}{Ratio of the overall set size width to intersection matrix width.} -\item{show_sets_size}{the overall set sizes plot, e.g. from upset_set_size() +\item{show_sets_size}{The overall set sizes plot, e.g. from upset_set_size() (FALSE to hide)} } \value{ -An UpSet plot visualizing the intersections of significant genes across pathways. +ggplot2 plot object. } \description{ -Generate UpSet Plot on Intersection of Significant Genes from scmpObject +Generate UpSet Plot on Intersection of Significant Genes from scMaSigPro +object. It is a wrapper around `ComplexUpset::upset`. +} +\author{ +Priyansh Srivastava \email{spriyansh29@gmail.com} } diff --git a/man/plotTrend.Rd b/man/plotTrend.Rd index dc56be1..c951618 100644 --- a/man/plotTrend.Rd +++ b/man/plotTrend.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/plotTrend.R \name{plotTrend} \alias{plotTrend} -\title{Plot Groups Function} +\title{Plot trend of the single gene.} \usage{ plotTrend( scmpObj, @@ -18,32 +18,36 @@ plotTrend( ) } \arguments{ -\item{scmpObj}{object of class scmpObj} +\item{scmpObj}{An object of class \code{\link{ScMaSigPro}}.} -\item{feature_id}{Name of the gene to be plotted. Should correspond to one of -the feature in the count table.} +\item{feature_id}{Name of the gene to be plotted.} -\item{xlab}{X-axis label. Default is "Pooled Pseudotime".} +\item{xlab}{X-axis label. (Default is "Pooled Pseudotime")} -\item{ylab}{Y-axis label. Default is "Pseudobulk Expression".} +\item{ylab}{Y-axis label. (Default is "Pseudobulk Expression")} -\item{smoothness}{How smooth the trend should be. Default is 0.01, setting to -higher values will result in more linear trends.} +\item{smoothness}{How smooth the trend should be. Setting to +higher values will result in more linear trends. (Default is 0.01)} -\item{logs}{Whether to plot log of counts.} +\item{logs}{Whether to log transform counts. (Default is TRUE)} -\item{logType}{Log Available options 'log', 'log2', 'log10'} +\item{logType}{How to log transform the values. Available options 'log', +'log2', 'log10'. (Default is 'log')} -\item{pseudoCount}{Add a pseudo-count before taking the log.} +\item{pseudoCount}{Add a pseudo-count before taking the log. (Default is 1)} -\item{significant}{Default is FALSE. Set to TRUE to plot genes, that don't pass -R-Square threshold from 'sc.filter()'.} +\item{significant}{Plot gene only if the models are significant based on +\code{scMaSigPro::sc.filter()}. (Default is TRUE)} -\item{summary_mode}{description} +\item{summary_mode}{Compress the expression values per replicate (if present) +per binned pseudotime point. Default is 'median'. Other option 'mean'} } \value{ -Generates a plot. +ggplot2 plot object. } \description{ -This function generates plots based on various parameters. It calculates the summary mode, colors, and other visual attributes to create a plot. +Plot trend of the single gene across the binned pseudotime. +} +\author{ +Priyansh Srivastava \email{spriyansh29@gmail.com} } diff --git a/man/plotTrendCluster.Rd b/man/plotTrendCluster.Rd index 8d5c13e..8f6fa46 100644 --- a/man/plotTrendCluster.Rd +++ b/man/plotTrendCluster.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/plotTrendCluster.R \name{plotTrendCluster} \alias{plotTrendCluster} -\title{Plot Groups Function} +\title{Plot multiple trends of the multiple genes.} \usage{ plotTrendCluster( scmpObj, @@ -21,35 +21,44 @@ plotTrendCluster( ) } \arguments{ -\item{scmpObj}{object of class scmpObj} +\item{scmpObj}{An object of class \code{\link{ScMaSigPro}}.} -\item{xlab}{X-axis label. Default is "Pooled Pseudotime".} +\item{xlab}{X-axis label. (Default is "Pooled Pseudotime")} -\item{ylab}{Y-axis label. Default is "Pseudobulk Expression".} +\item{ylab}{Y-axis label. (Default is "Pseudobulk Expression")} -\item{plot}{description} +\item{plot}{Whether to plot 'coeff' or 'counts'. (Default is 'counts')} -\item{summary_mode}{description} +\item{summary_mode}{Compress the expression values per replicate (if present) +per binned pseudotime point. Default is 'median'. Other option 'mean'} -\item{logs}{Whether to plot log of counts} +\item{logs}{Whether to log transform counts. (Default is TRUE)} -\item{logType}{Log type required} +\item{logType}{How to log transform the values. Available options 'log', +'log2', 'log10'. (Default is 'log')} -\item{smoothness}{description} +\item{smoothness}{How smooth the trend should be. Setting to +higher values will result in more linear trends. (Default is 0.01)} -\item{includeInflu}{description} +\item{includeInflu}{Include gene only if it has influential data. +(Default is TRUE)} -\item{verbose}{description} +\item{verbose}{Print detailed output in the console. (Default is TRUE)} -\item{pseudoCount}{description} +\item{pseudoCount}{Add a pseudo-count before taking the log. (Default is 1)} -\item{significant}{description} +\item{significant}{Include gene only if the models are significant based on +\code{scMaSigPro::sc.filter()}. (Default is TRUE)} -\item{parallel}{description} +\item{parallel}{Use forking process to run parallelly. (Default is FALSE) +(Currently, Windows is not supported)} } \value{ -Generates a plot. +ggplot2 plot object. } \description{ -This function generates plots based on various parameters. +Plot trends of multiple genes (clustered) across the binned pseudotime. +} +\author{ +Priyansh Srivastava \email{spriyansh29@gmail.com} } diff --git a/man/sc.p.vector.Rd b/man/sc.p.vector.Rd index b37258c..000dc1e 100644 --- a/man/sc.p.vector.Rd +++ b/man/sc.p.vector.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/sc.p.vector.R \name{sc.p.vector} \alias{sc.p.vector} -\title{Make regression fit for Binned Pseudotime. Adaption of maSigPro::p.vector()} +\title{Perform fitting with full model.} \usage{ sc.p.vector( scmpObj, @@ -19,53 +19,52 @@ sc.p.vector( ) } \arguments{ -\item{scmpObj}{matrix containing normalized gene expression data. Genes must be in rows and arrays in columns.} +\item{scmpObj}{An object of class \code{\link{ScMaSigPro}}.} -\item{p_value}{significance level. Default is 0.05.} +\item{p_value}{Significance level used for variable selection in the stepwise +regression.} -\item{mt_correction}{argument to pass to \code{p.adjust} function indicating the method for multiple testing adjustment of p.value.} +\item{mt_correction}{A character string specifying the p-value correction +method.} -\item{min_na}{genes with less than this number of true numerical values will be excluded from the analysis. -Minimum value to estimate the model is (degree+1) x Groups + 1. Default is 6.} +\item{min_na}{Minimum values needed per gene across cells to estimate the +model.} -\item{family}{the distribution function to be used in the glm model. -It must be specified as a function: \code{gaussian()}, \code{poisson()}, \code{negative.binomial(theta)}... -If NULL, the family will be \code{negative.binomial(theta)} when \code{counts = TRUE} or \code{gaussian()} when \code{counts = FALSE}.} +\item{family}{Distribution of the error term.} -\item{epsilon}{argument to pass to \code{glm.control}, convergence tolerance in the iterative process to estimate the glm model.} +\item{epsilon}{Model convergence tolerance.} -\item{verbose}{Name of the analyzed item to show on the screen while \code{T.fit} is in process.} +\item{verbose}{Print detailed output in the console. (Default is TRUE)} -\item{offset}{Whether ro use offset for normalization} +\item{offset}{logical value specifying whether to use offset during fitting.} -\item{parallel}{Enable parallel processing} +\item{parallel}{Use forking process to run parallelly. (Default is FALSE) +(Currently, Windows is not supported)} -\item{log_offset}{Take the log of teh offset. Similar to -'log(estimateSizeFactorsForMatrix)' from DESeq2.} +\item{log_offset}{A logical value specifying whether to take the logarithm of +the offsets.} -\item{max_it}{Integer giving the maximal number of IWLS iterations.} +\item{max_it}{Maximum number of iterations to fit the model.} } \value{ -ScMaSigPro object +An object of class \code{\link{ScMaSigPro}}, with updated `Profile` +slot. } \description{ -\code{sc.p.vector} performs a regression fit for each gene taking all variables -present in the model given by a regression matrix #' and returns a list of FDR corrected significant genes. -} -\details{ -\code{rownames(design)} and \code{colnames(data)} must be identical vectors - and indicate array naming. \code{rownames(data)} should contain unique gene IDs. - \code{colnames(design)} are the given names for the variables in the regression model. +Performs a regression fit for each gene taking all variables present in the +model. } \references{ Conesa, A., Nueda M.J., Alberto Ferrer, A., Talon, T. 2006. -maSigPro: a Method to Identify Significant Differential Expression Profiles in Time-Course Microarray Experiments. -Bioinformatics 22, 1096-1102 +maSigPro: a Method to Identify Significant Differential Expression Profiles +in Time-Course Microarray Experiments. Bioinformatics 22, 1096-1102 } \seealso{ -\code{\link{T.fit}}, \code{\link{lm}} +\code{\link{VariableProfiles}} } \author{ -Ana Conesa, Maria Jose Nueda and Priyansh Srivastava \email{spriyansh29@gmail.com} +Priyansh Srivastava \email{spriyansh29@gmail.com}, Ana Conesa and +Maria Jose Nueda, \email{mj.nueda@ua.es} } +\keyword{models} \keyword{regression} diff --git a/man/sc.set.poly.Rd b/man/sc.set.poly.Rd index ddb1fd1..88ce3f9 100644 --- a/man/sc.set.poly.Rd +++ b/man/sc.set.poly.Rd @@ -2,27 +2,43 @@ % Please edit documentation in R/sc.set.poly.R \name{sc.set.poly} \alias{sc.set.poly} -\title{Create predictors and set polynomial. Adaption of maSigPro::make.design.matrix()} +\title{Set up polynomial models and create Predictor Matrix} \usage{ sc.set.poly( - scmpObject, + scmpObj, poly_degree = 2, - bin_ptime_col = scmpObject@Parameters@bin_ptime_col, - path_col = scmpObject@Parameters@path_col + bin_ptime_col = scmpObj@Parameters@bin_ptime_col, + path_col = scmpObj@Parameters@path_col ) } \arguments{ -\item{scmpObject}{A 'ScMaSigPro' object.} +\item{scmpObj}{An object of class \code{\link{ScMaSigPro}}.} -\item{poly_degree}{Degree of the design matrix (default: 2).} +\item{poly_degree}{Degree of the polynomial.} -\item{bin_ptime_col}{Name of the time column.} +\item{bin_ptime_col}{A character string representing the column name +for binned Pseudotime values in 'Dense' data.} -\item{path_col}{Name of the path column.} +\item{path_col}{A character string representing the column name for branching +path assignment in 'Sparse' or 'Dense' slot.} } \value{ -Returns the 'scmpObject' with an updated 'design' slot. +An object of class \code{\link{ScMaSigPro}}, with updated `Design` +slot. } \description{ -Create predictors and set polynomial. Adaption of maSigPro::make.design.matrix() +Set up polynomial models and create Predictor Matrix that will contain the +independent variables. It is a wrapper around `maSigPro::make.design.matrix`. +} +\references{ +{Conesa, A., Nueda M.J., Alberto Ferrer, A., Talon, T. 2006. +maSigPro: a Method to Identify Significant Differential Expression Profiles +in Time-Course Microarray Experiments. Bioinformatics 22, 1096-1102} +} +\seealso{ +\code{\link{MatrixDesign}} Class. +} +\author{ +Priyansh Srivastava \email{spriyansh29@gmail.com}, Ana Conesa and +Maria Jose Nueda, \email{mj.nueda@ua.es} } diff --git a/man/sc.squeeze.Rd b/man/sc.squeeze.Rd index 7ed712c..55b1015 100644 --- a/man/sc.squeeze.Rd +++ b/man/sc.squeeze.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/sc.squeeze.R \name{sc.squeeze} \alias{sc.squeeze} -\title{Pseudo-bulking with optimal number of pseudotime based bins} +\title{Pseudo-bulking with optimal number of Pseudotime based bins} \usage{ sc.squeeze( - scmpObject, - ptime_col = scmpObject@Parameters@ptime_col, - path_col = scmpObject@Parameters@path_col, + scmpObj, + ptime_col = scmpObj@Parameters@ptime_col, + path_col = scmpObj@Parameters@path_col, bin_method = "Sturges", drop_fac = 1, verbose = FALSE, @@ -19,86 +19,76 @@ sc.squeeze( prune_bins = FALSE, assay_name = "counts", drop_trails = FALSE, - cluster_count_by = "sum", + aggregate = "sum", fill_gaps = FALSE, additional_params = list(use_unique_time_points = FALSE) ) } \arguments{ -\item{scmpObject}{object of Class scMaSigPro. See \code{\link{ScMaSigPro}} -for more details.} +\item{scmpObj}{An object of class \code{\link{ScMaSigPro}}.} -\item{ptime_col}{Name of the column in `cell.metadata` storing -Pseudotime values. Generated using `colData` from the \pkg{SingleCellExperiment} -package. (Default is "Pseudotime").} +\item{ptime_col}{A character string representing the column name +for inferred Pseudotime values in 'Sparse' data. (Default is "Pseudotime").} -\item{path_col}{Name of the column in `cell.metadata` storing information -for Path. Generated using `colData` from the \pkg{SingleCellExperiment} -package. (Default is `path_prefix`).} +\item{path_col}{A character string representing the column name for branching +path assignment in 'Sparse' or 'Dense' data. (Default is `path_prefix`).} -\item{bin_method}{A character string specifying the method to use in order to -estimate the optimal number of bins. Available options: "Freedman.Diaconis", -"Sqrt", "Sturges", "Rice", "Doane", and "Scott.Normal". See \code{\link{estBinSize}} -for more details. (Default = "Sturges").} +\item{bin_method}{A character string representing the algorithm used for +binning. Available options: "Freedman.Diaconis", +"Sqrt", "Sturges", "Rice", "Doane", and "Scott.Normal". (Default = "Sturges")} -\item{drop_fac}{A numeric value specifying the factor by which to decrease the -number of bins if the initial binning results in too many bins. (Default = 1).} +\item{drop_fac}{A numeric value specifying the factor by which to adjust the +number of bins if the initial binning results in too many/few bins. +(Default = 1).} -\item{verbose}{Print detailed output in the console. (Default is TRUE) -per path iteratively. Options: "universal", "individual. (Default = "universal").} +\item{verbose}{Print detailed output in the console. (Default is TRUE)} -\item{bin_mem_col}{Name of the column in the 'annotated_cell_metadata'} +\item{bin_mem_col}{A character string representing the name of the column in +which cells per bin are stored. (Default is "scmp_bin_members").} -\item{bin_col}{Name of the bin column name} +\item{bin_col}{A character string representing the name of the column in which +bin labels are stored. (Default is "scmp_bin").} -\item{bin_size_col}{Setting the name of the bin size column.} +\item{bin_size_col}{A character string representing the name of the column in +which bin sizes per bin are stored. (Default is "scmp_bin_size").} -\item{bin_ptime_col}{Name of the column to store the computed Pseudotime -bins.} +\item{bin_ptime_col}{A character string representing the column name +for binned Pseudotime values in 'Dense' data. +(Default is "scmp_binned_pseudotime").} -\item{split_bins}{If bin sizes are greater than} +\item{split_bins}{If bin sizes are greater than mean + sd, split the bin into +smaller bins by re-running the sc.squeeze() function. (Default = FALSE).} -\item{prune_bins}{description} +\item{prune_bins}{If bin sizes are smaller than mean - sd, remove the bin. +(Default = FALSE).} -\item{assay_name}{Name of the Assay in the assay_name object from which retrieve the counts. -(Default = "counts").} +\item{assay_name}{Name of the Assay in sparse data from which the counts are +used. (Default = "counts").} -\item{drop_trails}{description} +\item{drop_trails}{If the paths have different lengths of the binned pseudotime, +drop the bins from the path with more bins. (Default = FALSE).} -\item{cluster_count_by}{A character string specifying the method to use to -aggregate counts within each cluster. Available options are 'mean' or 'sum'. (Default = "sum").} +\item{aggregate}{A character string specifying the method to aggregate counts +within each cluster. Available options are 'mean' or 'sum'. (Default = "sum").} -\item{fill_gaps}{description} +\item{fill_gaps}{If corresponding bin is missing for a time-point, pull the +successive bins and fill the gaps.} -\item{additional_params}{Pass additional parameters as a named list. See Details.} +\item{additional_params}{Pass additional parameters as a named list. +See examples} } \value{ -A data.frame that contains the original data plus additional columns: -- 'bin' : Name of the bin -- 'bin_size' : Size of the bin -- 'binned_time' : Interval range of each bin -This function returns the merged data.frame with new discretized -ptime_col, preserving the original rownames. +An object of class \code{\link{ScMaSigPro}}, with updated `Dense` +slot. } \description{ -`squeeze()` discretizes a continuous time series column into bins +`sc.squeeze()` discretizes a continuous time series column into bins of equal size using entropy-based binning method. It automatically calculates -the optimal number of bins using one of the supported methods. The bin sizes -are also calculated and merged with the input cell_metadata. -} -\details{ -This function performs the following steps: -- Adds a new column 'cell' to the input data.frame which copies the row names. -- Extracts the time series data from the specified column of the input data.frame. -- Calculates the optimal number of bins using the specified method. -- Prints the estimated number of bins if verbose is set to TRUE. -- Discretizes the time series data into bins using the entropy-based binning method. -- Merges the original data.frame with the new binned time series data. -- Removes the 'cell' column and sets the row names back to the original row names of the input data.frame. -- Returns the merged data.frame. +the optimal number of bins using one of the supported methods. } \seealso{ -\code{\link{estBinSize}}, \code{\link{discretize}}, \code{\link{create_range}} +\code{\link{estBinSize}}, \code{\link{discretize}}, +\code{\link{create_range}} } \author{ Priyansh Srivastava \email{spriyansh29@gmail.com} diff --git a/man/sc.t.fit.Rd b/man/sc.t.fit.Rd index b3a30ba..6403b2e 100644 --- a/man/sc.t.fit.Rd +++ b/man/sc.t.fit.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/sc.t.fit.R \name{sc.t.fit} \alias{sc.t.fit} -\title{Makes a stepwise regression fit for time series gene expression experiments} +\title{Perform stepwise regression fit to select for significant terms.} \usage{ sc.t.fit( scmpObj, @@ -19,70 +19,51 @@ sc.t.fit( ) } \arguments{ -\item{scmpObj}{Can either be a \code{\link{p.vector}} object or a matrix containing expression scmpObj with the same requirements as for -the \code{\link{p.vector}} function.} +\item{scmpObj}{An object of class \code{\link{ScMaSigPro}}.} -\item{selection_method}{Argument to be passed to the step function. Can be either \code{"backward"}, \code{"forward"}, \code{"two.ways.backward"}, or \code{"two.ways.forward"}.} +\item{selection_method}{Method for step-wise regression.} -\item{p_value}{Significance level used for variable selection in the stepwise regression.} +\item{p_value}{Significance level used for variable selection in the stepwise +regression.} -\item{nvar_correction}{Argument for correcting T.fit significance level. See details.} +\item{nvar_correction}{Argument for correcting significance level. See details.} -\item{family}{The distribution function to be used in the glm model. It must be the same used in \code{p.vector}.} +\item{family}{Distribution of the error term.} -\item{epsilon}{Argument to pass to \code{glm.control}, convergence tolerance in the iterative process to estimate the glm model.} +\item{epsilon}{Model convergence tolerance.} -\item{offset}{Whether ro use offset for normalization} +\item{offset}{A logical value specifying whether to use offset during fitting.} -\item{verbose}{Name of the analyzed item to show on the screen while \code{T.fit} is in process.} +\item{verbose}{Print detailed output in the console. (Default is TRUE)} -\item{parallel}{description} +\item{parallel}{Use forking process to run parallelly. (Default is FALSE) +(Currently, Windows is not supported)} -\item{log_offset}{description} +\item{log_offset}{A logical value specifying whether to take the logarithm of +the offsets.} -\item{max_it}{description} +\item{max_it}{Maximum number of iterations to fit the model.} } \value{ -A list containing the following elements: -\item{sol}{Matrix for summary results of the stepwise regression. For each selected gene, the following values are given: -\itemize{ - \item p-value of the regression ANOVA - \item R-squared of the model - \item p-value of the regression coefficients of the selected variables -}} -\item{coefficients}{Matrix containing regression coefficients for the adjusted models.} -\item{group.coeffs}{Matrix containing the coefficients of the implicit models of each experimental group.} -\item{variables}{Variables in the complete regression model.} -\item{G}{Total number of input genes.} -\item{g}{Number of genes taken in the regression fit.} -\item{dat}{Input analysis scmpObj matrix.} -\item{dis}{Regression design matrix.} -\item{selection_method}{Imputed step method for stepwise regression.} -\item{alloc}{Matrix of experimental design.} -\item{influ.info}{scmpObj frame of genes containing influential scmpObj.} +An object of class \code{\link{ScMaSigPro}}, with updated `Estimate` +slot. } \description{ -\code{s.t.fit} selects the best regression model for each gene using stepwise regression. -} -\details{ -In the maSigPro approach, \code{\link{p.vector}} and \code{\link{T.fit}} are subsequent steps, meaning that significant genes are -first selected based on a general model, and then the significant variables for each gene are found by step-wise regression. - -The step regression can be \code{"backward"} or \code{"forward"}, indicating whether the step procedure starts from the -model with all or none variables. With the \code{"two.ways.backward"} or \code{"two.ways.forward"} options, the variables are both allowed to get in and out. -At each step, the p-value of each variable is computed, and variables get in/out of the model when this p-value is -lower or higher than the given threshold \code{p_value}. When \code{nvar_correction} is TRUE, the given significance level is corrected by the number of variables in the model. +Performs stepwise regression and selects the significant polynomial terms +from the full polynomial model. This function is succeeded by +\code{scMaSigPro::sc.p.vector()}. } \references{ {Conesa, A., Nueda M.J., Alberto Ferrer, A., Talon, T. 2006. -maSigPro: a Method to Identify Significant Differential Expression Profiles in Time-Course Microarray Experiments. -Bioinformatics 22, 1096-1102} +maSigPro: a Method to Identify Significant Differential Expression Profiles +in Time-Course Microarray Experiments. Bioinformatics 22, 1096-1102} } \seealso{ -{\code{\link{p.vector}}, \code{\link{step}}} +\code{\link{Estimates}} Class. } \author{ -{Ana Conesa and Maria Jose Nueda, \email{mj.nueda@ua.es}} +Priyansh Srivastava \email{spriyansh29@gmail.com}, Ana Conesa and +Maria Jose Nueda, \email{mj.nueda@ua.es} } \keyword{models} \keyword{regression} diff --git a/man/scmp_estimateSizeFactorsForMatrix.Rd b/man/scmp_estimateSizeFactorsForMatrix.Rd index 872a702..76676a0 100644 --- a/man/scmp_estimateSizeFactorsForMatrix.Rd +++ b/man/scmp_estimateSizeFactorsForMatrix.Rd @@ -45,8 +45,8 @@ factor for this column. \author{ Simon Anders Please cite as -Love, M.I., Huber, W., Anders, S. Moderated estimation of fold change and dispersion for RNA-seq data with -DESeq2 Genome Biology 15(12):550 (2014) +Love, M.I., Huber, W., Anders, S. Moderated estimation of fold change and +dispersion for RNA-seq data with DESeq2 Genome Biology 15(12):550 (2014) Low-level function to estimate size factors with robust regression. } diff --git a/man/showCoeff.Rd b/man/showCoeff.Rd index f2875e6..4079ea7 100644 --- a/man/showCoeff.Rd +++ b/man/showCoeff.Rd @@ -7,18 +7,21 @@ showCoeff(scmpObj, view = FALSE, return = TRUE, includeInflu = TRUE) } \arguments{ -\item{scmpObj}{an object of class 'ScMaSigPro'. This object should contain the computed solution.} +\item{scmpObj}{An object of class \code{\link{ScMaSigPro}}.} -\item{view}{logical, whether to view the solution. If TRUE (default), the solution is displayed.} +\item{view}{Whether to view the data in the explorer. (Default: FALSE)} -\item{return}{logical, whether to return the solution. If FALSE (default), the solution is not returned.} +\item{return}{Whether to return the data. (Default: TRUE)} -\item{includeInflu}{description} +\item{includeInflu}{Whether to include genes with inluential observations.} } \value{ -The computed solution as a data.frame if return is set to TRUE. -If return is FALSE, the function does not return anything. +The computed Coefficent matrix as a dataframe. } \description{ -This function is used to view or return the coeffients of the provided scMaSigPro object. +This function is used to view or return the coefficents from the provided +scMaSigPro object. +} +\author{ +Priyansh Srivastava \email{spriyansh29@gmail.com} } diff --git a/man/showGroupCoeff.Rd b/man/showGroupCoeff.Rd index 4edd5bd..7b632ba 100644 --- a/man/showGroupCoeff.Rd +++ b/man/showGroupCoeff.Rd @@ -2,23 +2,23 @@ % Please edit documentation in R/show_functions.R \name{showGroupCoeff} \alias{showGroupCoeff} -\title{Show or Return the Group wise coefficents} +\title{Show or Return the Branching Path Coefficent matrix} \usage{ showGroupCoeff(scmpObj, view = FALSE, return = TRUE, includeInflu = TRUE) } \arguments{ -\item{scmpObj}{an object of class 'ScMaSigPro'. This object should contain the computed solution.} +\item{scmpObj}{An object of class \code{\link{ScMaSigPro}}.} -\item{view}{logical, whether to view the solution. If TRUE (default), the solution is displayed.} +\item{view}{Whether to view the data in the explorer. (Default: FALSE)} -\item{return}{logical, whether to return the solution. If FALSE (default), the solution is not returned.} +\item{return}{Whether to return the data. (Default: TRUE)} -\item{includeInflu}{description} +\item{includeInflu}{Whether to include genes with inluential observations.} } \value{ -The computed solution as a data.frame if return is set to TRUE. -If return is FALSE, the function does not return anything. +The computed branching path coefficent matrix as a dataframe. } \description{ -This function is used to view or return the group of the provided scMaSigPro object. +This function is used to view or return the branching paths coefficents from +the provided scMaSigPro object. } diff --git a/man/showInflu.Rd b/man/showInflu.Rd index bf0a9df..d9e41d3 100644 --- a/man/showInflu.Rd +++ b/man/showInflu.Rd @@ -2,21 +2,24 @@ % Please edit documentation in R/show_functions.R \name{showInflu} \alias{showInflu} -\title{Show or Return the matrix of influential genes} +\title{Return the matrix of genes with influential observation} \usage{ showInflu(scmpObj, view = FALSE, return = TRUE) } \arguments{ -\item{scmpObj}{an object of class 'ScMaSigPro'. This object should contain the computed solution.} +\item{scmpObj}{An object of class \code{\link{ScMaSigPro}}.} -\item{view}{logical, whether to view the solution. If TRUE (default), the solution is displayed.} +\item{view}{Whether to view the data in the explorer. (Default: FALSE)} -\item{return}{logical, whether to return the solution. If FALSE (default), the solution is not returned.} +\item{return}{Whether to return the data. (Default: TRUE)} } \value{ -The computed solution as a data.frame if return is set to TRUE. -If return is FALSE, the function does not return anything. +Matrix of genes with influential observation. } \description{ -This function is used to view or return the solution of the provided scMaSigPro object. +This function is used to view or return the matrix of genes with influential +observation from the provided scMaSigPro object. +} +\author{ +Priyansh Srivastava \email{spriyansh29@gmail.com} } diff --git a/man/showParams.Rd b/man/showParams.Rd index 09b7128..05562fe 100644 --- a/man/showParams.Rd +++ b/man/showParams.Rd @@ -2,21 +2,26 @@ % Please edit documentation in R/show_functions.R \name{showParams} \alias{showParams} -\title{Show or Return the parameters used during the analysis} +\title{Show the parameters used during the workflow.} \usage{ showParams(scmpObj, view = FALSE, return = TRUE) } \arguments{ -\item{scmpObj}{an object of class 'ScMaSigPro'. This object should contain the computed solution.} +\item{scmpObj}{An object of class \code{\link{ScMaSigPro}}.} -\item{view}{logical, whether to view the solution. If TRUE (default), the solution is displayed.} +\item{view}{Whether to view the data in the explorer. (Default: FALSE)} -\item{return}{logical, whether to return the solution. If FALSE (default), the solution is not returned.} +\item{return}{Whether to return the data. (Default: TRUE)} } \value{ The computed solution as a data.frame if return is set to TRUE. If return is FALSE, the function does not return anything. + +Dataframe of the parameters used in the analysis. } \description{ -This function is used to view or return the solution of the provided scMaSigPro object. +Get or View all the parameters used during the workflow. +} +\author{ +Priyansh Srivastava \email{spriyansh29@gmail.com} } diff --git a/man/showPoly.Rd b/man/showPoly.Rd index c4e5db7..4c841a2 100644 --- a/man/showPoly.Rd +++ b/man/showPoly.Rd @@ -2,16 +2,19 @@ % Please edit documentation in R/show_functions.R \name{showPoly} \alias{showPoly} -\title{Show the terms of the polynomial term} +\title{Print the full model formula.} \usage{ showPoly(scmpObj) } \arguments{ -\item{scmpObj}{an object of class 'ScMaSigPro'. This object should contain the computed solution.} +\item{scmpObj}{An object of class \code{\link{ScMaSigPro}}.} } \value{ -Return the terms of the polynomial model. +Character string of the formula for the full model. } \description{ -This function is used to view or return the solution of the provided scMaSigPro object. +Print the full model formula in console as a string. +} +\author{ +Priyansh Srivastava \email{spriyansh29@gmail.com} } diff --git a/man/showSigProf.Rd b/man/showSigProf.Rd index 0f54b4a..44d7712 100644 --- a/man/showSigProf.Rd +++ b/man/showSigProf.Rd @@ -2,23 +2,26 @@ % Please edit documentation in R/show_functions.R \name{showSigProf} \alias{showSigProf} -\title{Show or Return the Solution} +\title{Show or Return the counts for non-flat profile.} \usage{ showSigProf(scmpObj, view = FALSE, return = TRUE, includeInflu = FALSE) } \arguments{ -\item{scmpObj}{an object of class 'ScMaSigPro'. This object should contain the computed solution.} +\item{scmpObj}{An object of class \code{\link{ScMaSigPro}}.} -\item{view}{logical, whether to view the solution. If TRUE (default), the solution is displayed.} +\item{view}{Whether to view the data in the explorer. (Default: FALSE)} -\item{return}{logical, whether to return the solution. If FALSE (default), the solution is not returned.} +\item{return}{Whether to return the data. (Default: TRUE)} -\item{includeInflu}{logical, whether to add gene with influential data in the solution.} +\item{includeInflu}{Whether to include genes with inluential observations.} } \value{ -The computed solution as a data.frame if return is set to TRUE. -If return is FALSE, the function does not return anything. +Pseudo-bulk counts as matrix for genes with non-flat profiles. } \description{ -This function is used to view or return the solution of the provided scMaSigPro object. +This function is used to view or return the pseudo-bulk counts of the genes +with non-flat profiles. from the provided scMaSigPro object. +} +\author{ +Priyansh Srivastava \email{spriyansh29@gmail.com} } diff --git a/man/showSol.Rd b/man/showSol.Rd index b03abfe..6c4ff5a 100644 --- a/man/showSol.Rd +++ b/man/showSol.Rd @@ -2,23 +2,26 @@ % Please edit documentation in R/show_functions.R \name{showSol} \alias{showSol} -\title{Show or Return the Solution} +\title{Show or Return the P-values after model fitting.} \usage{ showSol(scmpObj, view = FALSE, return = TRUE, includeInflu = TRUE) } \arguments{ -\item{scmpObj}{an object of class 'ScMaSigPro'. This object should contain the computed solution.} +\item{scmpObj}{An object of class \code{\link{ScMaSigPro}}.} -\item{view}{logical, whether to view the solution. If TRUE (default), the solution is displayed.} +\item{view}{Whether to view the data in the explorer. (Default: FALSE)} -\item{return}{logical, whether to return the solution. If TRUE (default), returned.} +\item{return}{Whether to return the data. (Default: TRUE)} -\item{includeInflu}{logical, whether to add gene with influential data in the solution.} +\item{includeInflu}{Whether to include genes with inluential observations.} } \value{ -The computed solution as a data.frame if return is set to TRUE. -If return is FALSE, the function does not return anything. +The computed p-values for each term and full model as a dataframe. } \description{ -This function is used to view or return the solution of the provided scMaSigPro object. +This function is used to view or return the matrix of p-values for each term +and the full model from the provided scMaSigPro object. +} +\author{ +Priyansh Srivastava \email{spriyansh29@gmail.com} } diff --git a/man/showTS.Rd b/man/showTS.Rd index f7acdff..f99c045 100644 --- a/man/showTS.Rd +++ b/man/showTS.Rd @@ -2,23 +2,26 @@ % Please edit documentation in R/show_functions.R \name{showTS} \alias{showTS} -\title{Show or Return the t scores} +\title{Show or Return the t-score matrix} \usage{ showTS(scmpObj, view = FALSE, return = TRUE, includeInflu = TRUE) } \arguments{ -\item{scmpObj}{an object of class 'ScMaSigPro'. This object should contain the computed solution.} +\item{scmpObj}{An object of class \code{\link{ScMaSigPro}}.} -\item{view}{logical, whether to view the solution. If TRUE (default), the solution is displayed.} +\item{view}{Whether to view the data in the explorer. (Default: FALSE)} -\item{return}{logical, whether to return the solution. If FALSE (default), the solution is not returned.} +\item{return}{Whether to return the data. (Default: TRUE)} -\item{includeInflu}{logical, whether to add gene with influential data in the solution.} +\item{includeInflu}{Whether to include genes with inluential observations.} } \value{ -The computed solution as a data.frame if return is set to TRUE. -If return is FALSE, the function does not return anything. +The computed t-score matrix as a dataframe. } \description{ -This function is used to view or return the solution of the provided scMaSigPro object. +This function is used to view or return the t-scores from the provided +scMaSigPro object. +} +\author{ +Priyansh Srivastava \email{spriyansh29@gmail.com} } diff --git a/vignettes/Basic-Workflow.Rmd b/vignettes/Basic-Workflow.Rmd index 525231b..47f87ab 100644 --- a/vignettes/Basic-Workflow.Rmd +++ b/vignettes/Basic-Workflow.Rmd @@ -34,19 +34,15 @@ branching paths and pseudotime. Currently, `scMaSigPro` is available on GitHub and can be installed as follows: ```{r, echo=TRUE, eval=FALSE} -# Use public PAT -publicPat <- "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" - # Install devtools if not already installed if (!requireNamespace("devtools", quietly = TRUE)) { install.packages("devtools") } # Install scMaSigPro -devtools::install_github("spriyansh/scMaSigPro", - ref = "dev", - auth_token = publicPat, - build_vignettes = TRUE, +devtools::install_github("BioBam/scMaSigPro", + ref = "main", + build_vignettes = FALSE, build_manual = TRUE, upgrade = "never", force = TRUE, @@ -128,7 +124,7 @@ the parameter `labels_exist` and then pass the existing column names as a named ```{r, "Convert to `scMaSigPro` Object", eval=TRUE, echo=TRUE} # Helper Function to convert annotated SCE object to scmpObject -scmp.ob <- as_scmp( +scmp_ob <- as_scmp( object = splat.sim, from = "sce", align_pseudotime = FALSE, verbose = TRUE, @@ -145,7 +141,7 @@ to view various attributes, such as the dimensions (number of cells and number of genes), the available branching paths, and the range of pseudotime. ```{r, "Console Echo-1", eval=TRUE, echo = FALSE} -scmp.ob +scmp_ob ``` ### Pseudo-bulking along the continuum with `sc.squeeze()` @@ -158,7 +154,7 @@ characteristics of the data. Here, we will show the basic usage with the default parameters: ```{r, "Pseudo-bulking along the continuum with `sc.squeeze()`",eval=TRUE, echo=TRUE} -scmp.ob <- sc.squeeze(scmp.ob) +scmp_ob <- sc.squeeze(scmp_ob) ``` The console output of `scMaSigPro` is dynamic and displays more attributes as the @@ -166,7 +162,7 @@ analysis progresses. To view the results of the binning procedure, we can simply type the object's name into the console: ```{r, "Console Echo-2", eval=TRUE, echo = FALSE} -scmp.ob +scmp_ob ``` Here, we observe that the original pseudotime, which ranged from 1 to 100 with two cells at each step and different paths, is now rescaled to a range of 1 to 8, @@ -179,7 +175,7 @@ bin for each path. We can also visually inspect the binning process using a tile plot: ```{r, "Visualize bins",eval=TRUE, echo=TRUE} -plotBinTile(scmp.ob) +plotBinTile(scmp_ob) ``` The tile plot provides a clear view of the number of cells in each bin and how the @@ -207,7 +203,7 @@ To construct this model, we use `sc.set.poly()` to include quadratic terms: ```{r, "Polynomial Degree 2",eval=TRUE, echo=TRUE} # Polynomial Degree 2 -scmp.ob <- sc.set.poly(scmp.ob, poly_degree = 2) +scmp_ob <- sc.set.poly(scmp_ob, poly_degree = 2) ``` #### Visualize the model @@ -215,15 +211,15 @@ scmp.ob <- sc.set.poly(scmp.ob, poly_degree = 2) Once the model is stored, we can visualize the corresponding polynomial using the `showPoly()` function: ```{r, "showPoly",eval=TRUE, echo=TRUE} -showPoly(scmp.ob) +showPoly(scmp_ob) ``` Similarly, we can fit a cubic polynomial by setting the polynomial degree to 3: ```{r, "Polynomial Degree 3",eval=TRUE, echo=TRUE} # Polynomial Degree 3 -scmp.ob <- sc.set.poly(scmp.ob, poly_degree = 3) -showPoly(scmp.ob) +scmp_ob <- sc.set.poly(scmp_ob, poly_degree = 3) +showPoly(scmp_ob) ``` However, for simplicity, we will explore a polynomial of degree 1. Note that @@ -232,8 +228,8 @@ exponential and nonlinear gene expression patterns: ```{r, "# Polynomial Degree 1", eval=TRUE, echo=TRUE} # Polynomial Degree 1 -scmp.ob <- sc.set.poly(scmp.ob, poly_degree = 1) -showPoly(scmp.ob) +scmp_ob <- sc.set.poly(scmp_ob, poly_degree = 1) +showPoly(scmp_ob) ``` In the above model we have: @@ -266,47 +262,63 @@ We can execute `sc.p.vector()` as follows: ```{r, "Detecting Genes with Non-Flat Profiles", eval=TRUE, echo=TRUE} # Detect non-flat profiles -scmp.ob <- sc.p.vector(scmp.ob, +scmp_ob <- sc.p.vector(scmp_ob, offset = TRUE, p_value = 0.05, verbose = FALSE, log_offset = TRUE ) -scmp.ob +scmp_ob ``` -The console output reveals that `scMaSigPro` detected 51 genes with non-flat profiles. +The console output reveals that `scMaSigPro` detected 51 genes with non-flat +profiles. ### Model Refinement Having identified genes with significant profiles, we can refine their polynomial -models using `sc.t.fit()`. This function evaluates each term of the polynomial model. -In our case, it will assess which among "beta0 + beta1\*Path2vsPath1 + +models using `sc.t.fit()`. This function evaluates each term of the polynomial +model. In our case, it will assess which among "beta0 + beta1\*Path2vsPath1 + beta2\*scmp_binned_pseudotime + beta3*scmp_binned_pseudotimexPath2" -significantly contributes to the differences. To execute `sc.t.fit()`, we proceed as follows: +significantly contributes to the differences. To execute `sc.t.fit()`, +we proceed as follows: ```{r, "Model Refinement",eval=TRUE, echo=TRUE} # Model refinement -scmp.ob <- sc.t.fit(scmp.ob, verbose = FALSE) -scmp.ob +scmp_ob <- sc.t.fit(scmp_ob, verbose = FALSE) +scmp_ob ``` --- ## Selection of Genes -With our refined models in hand, we now focus on identifying genes showing significant differences with pseudotime, among paths, or both. For this purpose, we use the `sc.get.siggenes()` function. Our aim is to select models with a relatively high $R^2$, indicating simple linear relationships. The `vars` parameter in `sc.get.siggenes()` allows us to extract different sets of significant genes. Setting `vars = 'all'` retrieves all non-flat profiles identified in `sc.p.vector()` with $R^2>=$ the specified threshold. The option `vars = 'groups` fetches genes per path, resulting in two gene lists that demonstrate associative significance among paths, helping us identify genes associated with one path or the other along the pseudotime continuum. The `vars = 'each'` option finds significance for each term in the polynomial. In our case, we are interested in genes differentially expressed between paths and over pseudotime continum, so we will choose `vars = 'groups`. +With our refined models in hand, we now focus on identifying genes showing +significant differences with pseudotime, among paths, or both. For this purpose, +we use the `sc.get.siggenes()` function. Our aim is to select models with a +relatively high $R^2$, indicating simple linear relationships. The `vars` +parameter in `sc.get.siggenes()` allows us to extract different sets of +significant genes. Setting `vars = 'all'` retrieves all non-flat profiles +identified in `sc.p.vector()` with $R^2>=$ the specified threshold. The option +`vars = 'groups` fetches genes per path, resulting in two gene lists that +demonstrate associative significance among paths, helping us identify genes +associated with one path or the other along the pseudotime continuum. The +`vars = 'each'` option finds significance for each term in the polynomial. +In our case, we are interested in genes differentially expressed between paths +and over pseudotime continum, so we will choose `vars = 'groups`. ```{r, "vars = groups",eval=TRUE, echo=TRUE} -scmp.ob <- sc.filter( - scmpObj = scmp.ob, +scmp_ob <- sc.filter( + scmpObj = scmp_ob, rsq = 0.7, vars = "groups", significant.intercept = "dummy", - includeInflu = T + includeInflu = TRUE ) ``` -By setting the vars parameter to "groups", the function will add genes with $R^2$ >= 0.7 to the object. To explore the number of genes per group, we will make an upset plot: +By setting the vars parameter to "groups", the function will add genes with +$R^2$ >= 0.7 to the object. To explore the number of genes per group, we will +make an upset plot: ```{r, "uspet",eval=TRUE, echo=TRUE} -plotIntersect(scmp.ob) +plotIntersect(scmp_ob) ``` Here, we observe that 23 genes belong to both Path2vsPath1 and Path1, indicating @@ -316,10 +328,10 @@ uniquely associated with Path2vsPath1. This implies that Path2 has 10 genes that are significantly differentially expressed over time, using Path1 genes as a reference. Let's explore a few of these genes: ```{r, "trend",eval=TRUE, echo=TRUE} -FigureA <- plotTrend(scmp.ob, "Gene9", logs = T, logType = "log") -FigureB <- plotTrend(scmp.ob, "Gene95", logs = T, logType = "log") -FigureC <- plotTrend(scmp.ob, "Gene10", logs = T, logType = "log") -FigureD <- plotTrend(scmp.ob, "Gene92", logs = T, logType = "log") +FigureA <- plotTrend(scmp_ob, "Gene9", logs = TRUE, logType = "log") +FigureB <- plotTrend(scmp_ob, "Gene95", logs = TRUE, logType = "log") +FigureC <- plotTrend(scmp_ob, "Gene10", logs = TRUE, logType = "log") +FigureD <- plotTrend(scmp_ob, "Gene92", logs = TRUE, logType = "log") (FigureA + FigureB) / (FigureC + FigureD) ``` @@ -332,14 +344,22 @@ groundTruth <- as.data.frame(splat.sim@rowRanges@elementMetadata) print(groundTruth[groundTruth$Gene %in% c("Gene9", "Gene95", "Gene10", "Gene92"), c(1, 2, 6, 8)]) ``` -The ground truth data reveals, for example, that Gene9 has a base gene mean of 0.5, a fold change of 1.5 in Path1, and remains at the same expression level for Path2. This trend is accurately captured in our analysis, as shown in Figure-A. Similarly, Gene92 does not show a significant difference between the paths but demonstrates a downtrend along pseudotime in both paths, a finding we successfully recapitulate in Figure-D. +The ground truth data reveals, for example, that Gene9 has a base gene mean of +0.5, a fold change of 1.5 in Path1, and remains at the same expression level for +Path2. This trend is accurately captured in our analysis, as shown in Figure-A. +Similarly, Gene92 does not show a significant difference between the paths but +demonstrates a downtrend along pseudotime in both paths, a finding we +successfully recapitulate in Figure-D. ### Cluster Trends -To discern general trends among the genes, we can cluster and visualize them together using the `sc.cluster.trend()` and `plotTrendCluster()`. By default, this function employs hierarchical clustering (hclust) and we will divide the genes into 4 clusters. +To discern general trends among the genes, we can cluster and visualize them +together using the `sc.cluster.trend()` and `plotTrendCluster()`. By default, +this function employs hierarchical clustering (hclust) and we will divide the +genes into 4 clusters. ```{r, "Cluster Trends"} # Cluster Trend -scmp.ob <- sc.cluster.trend( - scmp.ob, +scmp_ob <- sc.cluster.trend( + scmp_ob, geneSet = "union", k = 4 ) @@ -348,7 +368,7 @@ scmp.ob <- sc.cluster.trend( ```{r, "plot"} # Plot plotTrendCluster( - scmpObj = scmp.ob, + scmpObj = scmp_ob, plot = "coeff", logs = TRUE, verbose = FALSE @@ -359,8 +379,8 @@ This visualization helps in understanding the collective behavior of genes within each cluster, highlighting patterns and trends that might be relevant for further biological interpretation. -This concludes the basic usage quick start guide of `scMaSigPro`. Please refer to -other vignettes for more in-depth analysis. +This concludes the basic usage quick start guide of `scMaSigPro`. Please refer +to other vignettes for more in-depth analysis. --- diff --git a/vignettes/scMaSigPro-maSigPro.Rmd b/vignettes/scMaSigPro-maSigPro.Rmd index 7e77a55..fe1ae22 100644 --- a/vignettes/scMaSigPro-maSigPro.Rmd +++ b/vignettes/scMaSigPro-maSigPro.Rmd @@ -160,7 +160,7 @@ directs the data to the dense slot of the scmpObject, allowing us to proceed wit the analysis without using `sc.squeeze()`, which is specifically designed for scRNA-Seq pseudo-bulking. ```{r, "create-scmpObject", echo=TRUE, eval=TRUE} -scmp.ob <- create_scmp( +scmp_ob <- create_scmp( counts = count, cell_data = metadata, ptime_col = "Time", @@ -169,14 +169,14 @@ scmp.ob <- create_scmp( ) # Print Console output -scmp.ob +scmp_ob ``` In the console output, we can observe that the data has been transferred to the dense slot. We can visualize this using `plotBinTile()`. ```{r, "plot-bin-tile"} -plotBinTile(scmp.ob) +plotBinTile(scmp_ob) ``` Next, we need to verify if `scMaSigPro` has created the appropriate count matrix. @@ -184,8 +184,8 @@ For this, we will use `testthat::expect_identical()`. This function highlights a mismatches; if everything is identical, it will not throw an error. The `eDense()` function retrieves the dense expression file from the `scMaSigPro` object. -```{r, "eDense(scmp.ob)"} -expect_identical(eDense(scmp.ob), expected = data.abiotic) +```{r, "eDense(scmp_ob)"} +expect_identical(eDense(scmp_ob), expected = data.abiotic) ``` No errors were thrown, indicating that our setup is correct and we can proceed @@ -204,13 +204,13 @@ specified in `edesign.abiotic`. design <- make.design.matrix(edesign.abiotic, degree = 2) # Using scMaSigPro -scmp.ob <- sc.set.poly(scmp.ob, poly_degree = 2) +scmp_ob <- sc.set.poly(scmp_ob, poly_degree = 2) # Comparing binarization results -expect_equal(bAlloc(scmp.ob), expected = edesign.abiotic) +expect_equal(bAlloc(scmp_ob), expected = edesign.abiotic) # Comparing regression matrices -expect_identical(scmp.ob@Design@predictor_matrix, expected = design$dis) +expect_identical(scmp_ob@Design@predictor_matrix, expected = design$dis) ``` No errors were thrown, so we can proceed with the analysis. @@ -228,7 +228,7 @@ gc <- capture_output(fit <- p.vector(data.abiotic, design, )) # Using ScMaSigPro -scmp.ob <- sc.p.vector(scmp.ob, +scmp_ob <- sc.p.vector(scmp_ob, min_na = 20, verbose = FALSE, offset = FALSE, @@ -244,14 +244,14 @@ results from `sc.p.vector()`: ```{r, "testthat pvector"} # Compare p-values expect_identical( - matrix(scmp.ob@Profile@p_values, - dimnames = list(names(scmp.ob@Profile@p_values), "p.value") + matrix(scmp_ob@Profile@p_values, + dimnames = list(names(scmp_ob@Profile@p_values), "p.value") ), expected = as.matrix(fit$p.vector[, 1, drop = FALSE]) ) # Compare adjusted p-values -pad <- scmp.ob@Profile@adj_p_values +pad <- scmp_ob@Profile@adj_p_values names(pad) <- NULL expect_identical(pad, expected = fit$p.adjusted) ``` @@ -264,7 +264,7 @@ terms for each gene, using `T.fit()` and `sc.t.fit()`. gc <- capture_output(tstep <- T.fit(fit, step.method = "backward", alfa = 0.05)) # Using scMaSigPro -scmp.ob <- sc.t.fit(scmp.ob, +scmp_ob <- sc.t.fit(scmp_ob, offset = FALSE, verbose = FALSE, epsilon = 0.00001, @@ -276,16 +276,16 @@ We will compare the s3 object `tstep` from `maSigPro::T.fit()` with the results ```{r, "comapte sc.t.fit"} # Solutions -expect_identical(showSol(scmp.ob), expected = tstep$sol) +expect_identical(showSol(scmp_ob), expected = tstep$sol) # Coefficients -expect_identical(showCoeff(scmp.ob), expected = tstep$coefficients) +expect_identical(showCoeff(scmp_ob), expected = tstep$coefficients) # Group Coefficients -expect_identical(showGroupCoeff(scmp.ob), expected = tstep$group.coeffs) +expect_identical(showGroupCoeff(scmp_ob), expected = tstep$group.coeffs) # tscore -expect_identical(as.data.frame(showTS(scmp.ob)), expected = tstep$t.score) +expect_identical(as.data.frame(showTS(scmp_ob)), expected = tstep$t.score) ``` ## R-Square Filter with `maSigPro::get.siggenes()` @@ -297,29 +297,29 @@ This is achieved using the `get.siggenes()` function in `maSigPro` and `sc.filte sigs <- get.siggenes(tstep, rsq = 0.6, vars = "groups") # scMaSigPro -scmp.ob <- sc.filter(scmp.ob, rsq = 0.6, vars = "groups") +scmp_ob <- sc.filter(scmp_ob, rsq = 0.6, vars = "groups") ``` We'll compare the s3 object (sigs) from `maSigPro::get.siggenes()' with the results from sc.filter()': ```{r} # Compare Cold vs Control -expect_identical(scmp.ob@Significant@genes$ColdvsControl, +expect_identical(scmp_ob@Significant@genes$ColdvsControl, expected = sigs$summary$ColdvsControl ) # Compare Heat vs Control -expect_identical(scmp.ob@Significant@genes$HeatvsControl, +expect_identical(scmp_ob@Significant@genes$HeatvsControl, expected = sigs$summary$HeatvsControl[sigs$summary$HeatvsControl != " "] ) # Compare Salt vs Control -expect_identical(scmp.ob@Significant@genes$SaltvsControl, +expect_identical(scmp_ob@Significant@genes$SaltvsControl, expected = sigs$summary$SaltvsControl[sigs$summary$SaltvsControl != " "] ) # Compare Control -expect_identical(scmp.ob@Significant@genes$Control, +expect_identical(scmp_ob@Significant@genes$Control, expected = sigs$summary$Control[sigs$summary$Control != " "] ) ``` @@ -339,7 +339,7 @@ suma2Venn(sigs$summary[, c(1:4)]) # Upset plot of scMaSigPro plotIntersect( - scmpObj = scmp.ob, + scmpObj = scmp_ob, min_intersection_size = 0 ) ``` @@ -363,7 +363,7 @@ PlotGroups(STMDE66, ) # Plotting the same gene with scMaSigPro -plotTrend(scmp.ob, "STMDE66", +plotTrend(scmp_ob, "STMDE66", logs = FALSE, pseudoCount = 0, smoothness = 0.01, significant = FALSE, summary_mode = "mean" @@ -375,13 +375,13 @@ plotTrend(scmp.ob, "STMDE66", # Plot clustered Trend gc <- capture_output( res <- see.genes(sigs$sig.genes$ColdvsControl, - show.fit = T, dis = design$dis, + show.fit = TRUE, dis = design$dis, cluster.method = "hclust", cluster.data = 1, k = 9 ) ) # Compute Clusters -scmp.ob <- sc.cluster.trend(scmp.ob, +scmp_ob <- sc.cluster.trend(scmp_ob, geneSet = "ColdvsControl", cluster_by = "counts" ) @@ -390,7 +390,7 @@ scmp.ob <- sc.cluster.trend(scmp.ob, ### Plot cluster Trends ```{r, "Plot Clustered Trends", fig.width=8, fig.height=8} plotTrendCluster( - scmpObj = scmp.ob, + scmpObj = scmp_ob, plot = "coeff", verbose = FALSE )