Skip to content

Commit

Permalink
Merge pull request #26 from fmicompbio/v0.9.6
Browse files Browse the repository at this point in the history
v0.9.6
  • Loading branch information
csoneson authored Dec 31, 2024
2 parents d429fd6 + a131578 commit fb53942
Show file tree
Hide file tree
Showing 50 changed files with 751 additions and 110 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:

- name: Cache R packages
if: runner.os != 'Windows' && matrix.config.image == null
uses: actions/cache@v1
uses: actions/cache@v4
with:
path: ${{ env.R_LIBS_USER }}
key: ${{ env.cache-version }}-${{ runner.os }}-bioc-${{ matrix.config.bioc }}-${{ hashFiles('depends.Rds') }}
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: einprot
Type: Package
Title: A collection of proteomics analysis utilities and workflows
Version: 0.9.5
Version: 0.9.6
Authors@R: c(
person("Charlotte", "Soneson", email = "charlotte.soneson@fmi.ch",
role = c("aut", "cre"), comment = c(ORCID = "0000-0003-3833-2169")),
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export(doNormalization)
export(doPCA)
export(emptySampleText)
export(expDesignText)
export(featureCollectionText)
export(filterByModText)
export(filterFragPipe)
export(filterMaxQuant)
Expand All @@ -20,6 +21,7 @@ export(fixFeatureIds)
export(formatTableColumns)
export(getCalibrationFrompdAnalysis)
export(getColumnNames)
export(getComplexesToPlot)
export(getContaminantsDatabaseFrompdAnalysis)
export(getConvTable)
export(getFirstId)
Expand Down
12 changes: 12 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# einprot 0.9.6

* Sort output tables from differential abundance analysis by t-statistics instead of p-value
* Allow ComplexHeatmap::Heatmap arguments to be passed on by plotMissingValuesHeatmap
* Add option to limit plotMissingValuesHeatmap to rows with at least one missing value
* Add option to make imputation plots as density plots instead of histograms
* Add more details regarding the source of feature collections to report
* Add center.median.shared and center.mean.shared normalization methods
* Add maxComplexSimilarity argument to plotVolcano
* Update PomBase and WormBase conversion tables
* Add contamination filtering to Spectronaut (presence of contam_ prefix)

# einprot 0.9.5

* Add filtering by score and number of peptides to Spectronaut workflow
Expand Down
7 changes: 5 additions & 2 deletions R/checkArgumentsDIANN.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
minNbrValidValues, minlFC, samSignificance, nperm, volcanoAdjPvalThr,
volcanoLog2FCThr, volcanoMaxFeatures, volcanoLabelSign, volcanoS0,
volcanoFeaturesToLabel, addInteractiveVolcanos, interactiveDisplayColumns,
interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot, seed,
interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot,
maxComplexSimilarity, seed,
includeFeatureCollections, minSizeToKeepSet, customComplexes,
complexSpecies, complexDbPath, stringVersion, stringDir, linkTableColumns,
customYml, doRender
Expand Down Expand Up @@ -144,7 +145,8 @@
.assertVector(x = assaysForExport, type = "character", allowNULL = TRUE)
.assertScalar(x = addHeatmaps, type = "logical")
.assertScalar(x = normMethod, type = "character",
validValues = c(MsCoreUtils::normalizeMethods(), "none"))
validValues = c(MsCoreUtils::normalizeMethods(), "none",
"center.mean.shared", "center.median.shared"))
.assertVector(x = spikeFeatures, type = "character", allowNULL = TRUE)
.assertScalar(x = stattest, type = "character",
validValues = c("limma", "ttest", "proDA", "none"))
Expand All @@ -163,6 +165,7 @@
.assertScalar(x = complexFDRThr, type = "numeric", rngIncl = c(0, 1))
.assertScalar(x = maxNbrComplexesToPlot, type = "numeric",
rngIncl = c(0, Inf))
.assertScalar(x = maxComplexSimilarity, type = "numeric")
.assertScalar(x = minSizeToKeepSet, type = "numeric", rngIncl = c(0, Inf))
.assertVector(x = volcanoFeaturesToLabel, type = "character")
.assertVector(x = mergeGroups, type = "list")
Expand Down
7 changes: 5 additions & 2 deletions R/checkArgumentsFragPipe.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
minNbrValidValues, minlFC, samSignificance, nperm, volcanoAdjPvalThr,
volcanoLog2FCThr, volcanoMaxFeatures, volcanoLabelSign, volcanoS0,
volcanoFeaturesToLabel, addInteractiveVolcanos, interactiveDisplayColumns,
interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot, seed,
interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot,
maxComplexSimilarity, seed,
includeFeatureCollections, minSizeToKeepSet, customComplexes,
complexSpecies, complexDbPath, stringVersion, stringDir, linkTableColumns,
customYml, doRender
Expand Down Expand Up @@ -149,7 +150,8 @@
.assertVector(x = assaysForExport, type = "character", allowNULL = TRUE)
.assertScalar(x = addHeatmaps, type = "logical")
.assertScalar(x = normMethod, type = "character",
validValues = c(MsCoreUtils::normalizeMethods(), "none"))
validValues = c(MsCoreUtils::normalizeMethods(), "none",
"center.mean.shared", "center.median.shared"))
.assertVector(x = spikeFeatures, type = "character", allowNULL = TRUE)
.assertScalar(x = stattest, type = "character",
validValues = c("limma", "ttest", "proDA", "none"))
Expand All @@ -168,6 +170,7 @@
.assertScalar(x = complexFDRThr, type = "numeric", rngIncl = c(0, 1))
.assertScalar(x = maxNbrComplexesToPlot, type = "numeric",
rngIncl = c(0, Inf))
.assertScalar(x = maxComplexSimilarity, type = "numeric")
.assertScalar(x = minSizeToKeepSet, type = "numeric", rngIncl = c(0, Inf))
.assertVector(x = volcanoFeaturesToLabel, type = "character")
.assertVector(x = mergeGroups, type = "list")
Expand Down
7 changes: 5 additions & 2 deletions R/checkArgumentsMaxQuant.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
minNbrValidValues, minlFC, samSignificance, nperm, volcanoAdjPvalThr,
volcanoLog2FCThr, volcanoMaxFeatures, volcanoLabelSign, volcanoS0,
volcanoFeaturesToLabel, addInteractiveVolcanos, interactiveDisplayColumns,
interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot, seed,
interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot,
maxComplexSimilarity, seed,
includeFeatureCollections, minSizeToKeepSet, customComplexes,
complexSpecies, complexDbPath, stringVersion, stringDir, linkTableColumns,
customYml, doRender
Expand Down Expand Up @@ -134,7 +135,8 @@
.assertVector(x = assaysForExport, type = "character", allowNULL = TRUE)
.assertScalar(x = addHeatmaps, type = "logical")
.assertScalar(x = normMethod, type = "character",
validValues = c(MsCoreUtils::normalizeMethods(), "none"))
validValues = c(MsCoreUtils::normalizeMethods(), "none",
"center.mean.shared", "center.median.shared"))
.assertVector(x = spikeFeatures, type = "character", allowNULL = TRUE)
.assertScalar(x = stattest, type = "character",
validValues = c("limma", "ttest", "proDA", "none"))
Expand All @@ -153,6 +155,7 @@
.assertScalar(x = complexFDRThr, type = "numeric", rngIncl = c(0, 1))
.assertScalar(x = maxNbrComplexesToPlot, type = "numeric",
rngIncl = c(0, Inf))
.assertScalar(x = maxComplexSimilarity, type = "numeric")
.assertScalar(x = minSizeToKeepSet, type = "numeric", rngIncl = c(0, Inf))
.assertVector(x = volcanoFeaturesToLabel, type = "character")
.assertVector(x = mergeGroups, type = "list")
Expand Down
7 changes: 5 additions & 2 deletions R/checkArgumentsPDTMT.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
minNbrValidValues, minlFC, samSignificance, nperm, volcanoAdjPvalThr,
volcanoLog2FCThr, volcanoMaxFeatures, volcanoLabelSign, volcanoS0,
volcanoFeaturesToLabel, addInteractiveVolcanos, interactiveDisplayColumns,
interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot, seed,
interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot,
maxComplexSimilarity, seed,
includeFeatureCollections, minSizeToKeepSet, customComplexes,
complexSpecies, complexDbPath, stringVersion, stringDir, linkTableColumns,
customYml, doRender
Expand Down Expand Up @@ -157,7 +158,8 @@
.assertVector(x = assaysForExport, type = "character", allowNULL = TRUE)
.assertScalar(x = addHeatmaps, type = "logical")
.assertScalar(x = normMethod, type = "character",
validValues = c(MsCoreUtils::normalizeMethods(), "none"))
validValues = c(MsCoreUtils::normalizeMethods(), "none",
"center.mean.shared", "center.median.shared"))
.assertVector(x = spikeFeatures, type = "character", allowNULL = TRUE)
.assertScalar(x = stattest, type = "character",
validValues = c("limma", "ttest", "proDA", "none"))
Expand All @@ -176,6 +178,7 @@
.assertScalar(x = complexFDRThr, type = "numeric", rngIncl = c(0, 1))
.assertScalar(x = maxNbrComplexesToPlot, type = "numeric",
rngIncl = c(0, Inf))
.assertScalar(x = maxComplexSimilarity, type = "numeric")
.assertScalar(x = minSizeToKeepSet, type = "numeric", rngIncl = c(0, Inf))
.assertVector(x = volcanoFeaturesToLabel, type = "character")
.assertVector(x = mergeGroups, type = "list")
Expand Down
7 changes: 5 additions & 2 deletions R/checkArgumentsSpectronaut.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
minNbrValidValues, minlFC, samSignificance, nperm, volcanoAdjPvalThr,
volcanoLog2FCThr, volcanoMaxFeatures, volcanoLabelSign, volcanoS0,
volcanoFeaturesToLabel, addInteractiveVolcanos, interactiveDisplayColumns,
interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot, seed,
interactiveGroupColumn, complexFDRThr, maxNbrComplexesToPlot,
maxComplexSimilarity, seed,
includeFeatureCollections, minSizeToKeepSet, customComplexes,
complexSpecies, complexDbPath, stringVersion, stringDir, linkTableColumns,
customYml, doRender
Expand Down Expand Up @@ -147,7 +148,8 @@
.assertVector(x = assaysForExport, type = "character", allowNULL = TRUE)
.assertScalar(x = addHeatmaps, type = "logical")
.assertScalar(x = normMethod, type = "character",
validValues = c(MsCoreUtils::normalizeMethods(), "none"))
validValues = c(MsCoreUtils::normalizeMethods(), "none",
"center.mean.shared", "center.median.shared"))
.assertVector(x = spikeFeatures, type = "character", allowNULL = TRUE)
.assertScalar(x = stattest, type = "character",
validValues = c("limma", "ttest", "proDA", "none"))
Expand All @@ -166,6 +168,7 @@
.assertScalar(x = complexFDRThr, type = "numeric", rngIncl = c(0, 1))
.assertScalar(x = maxNbrComplexesToPlot, type = "numeric",
rngIncl = c(0, Inf))
.assertScalar(x = maxComplexSimilarity, type = "numeric")
.assertScalar(x = minSizeToKeepSet, type = "numeric", rngIncl = c(0, Inf))
.assertVector(x = volcanoFeaturesToLabel, type = "character")
.assertVector(x = mergeGroups, type = "list")
Expand Down
4 changes: 2 additions & 2 deletions R/constants.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ NULL
EINPROT_COMPLEXES_FILE <- "extdata/complexes/complexdb_einprot0.9.3_20240328_orthologs.rds"
#' @export
#' @rdname constants
EINPROT_WORMBASE_CONVTABLE <- "extdata/conversion_tables/WormBaseConv_einprot0.5.0_20220211.rds"
EINPROT_WORMBASE_CONVTABLE <- "extdata/conversion_tables/WormBaseConv_einprot0.9.6_20241018.rds"
#' @export
#' @rdname constants
EINPROT_POMBASE_CONVTABLE <- "extdata/conversion_tables/PomBaseConv_einprot0.5.0_20220211.rds"
EINPROT_POMBASE_CONVTABLE <- "extdata/conversion_tables/PomBaseConv_einprot0.9.6_20241018.rds"
21 changes: 15 additions & 6 deletions R/doFilter.R
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,10 @@ filterFragPipe <- function(sce, minPeptides, plotUpset = TRUE,
#' expression) used to identify decoys (reverse hits). The pattern is
#' matched against the IDs in the Spectronaut \code{PG.ProteinGroups}
#' column.
#' @param contamPattern Character scalar providing the pattern (a regular
#' expression) used to identify contaminants. The pattern is
#' matched against the IDs in the Spectronaut \code{PG.ProteinGroups}
#' column.
#' @param exclFile Character scalar, the path to a text file where the
#' features that are filtered out are written. If \code{NULL} (default),
#' excluded features are not recorded.
Expand All @@ -562,22 +566,27 @@ filterFragPipe <- function(sce, minPeptides, plotUpset = TRUE,
#' @importFrom rlang .data
#'
filterSpectronaut <- function(sce, minScore, minPeptides, plotUpset = TRUE,
revPattern = "_Decoy$", exclFile = NULL) {
revPattern = "_Decoy$",
contamPattern = "^contam_", exclFile = NULL) {
.assertVector(x = sce, type = "SummarizedExperiment")
.assertScalar(x = minScore, type = "numeric", allowNULL = TRUE)
.assertScalar(x = minPeptides, type = "numeric", allowNULL = TRUE)
.assertScalar(x = plotUpset, type = "logical")
.assertScalar(x = revPattern, type = "character")
.assertScalar(x = contamPattern, type = "character")
.assertScalar(x = exclFile, type = "character", allowNULL = TRUE)

## Make sure that the columns used for filtering later are character vectors
rowData(sce)$Reverse <- ifelse(grepl(revPattern, rowData(sce)$PG.ProteinGroups),
"+", "")
rowData(sce)$Contaminant <- ifelse(grepl(contamPattern,
rowData(sce)$PG.ProteinGroups),
"+", "")

filtdf <- as.data.frame(SummarizedExperiment::rowData(sce)) %>%
dplyr::select(dplyr::any_of(c("Reverse", "PG.NrOfStrippedSequencesIdentified.Experiment.wide",
"PG.Cscore"))) %>%
dplyr::mutate(across(dplyr::any_of(c("Reverse")),
"PG.Cscore", "Contaminant"))) %>%
dplyr::mutate(across(dplyr::any_of(c("Reverse", "Contaminant")),
function(x) as.numeric(x == "+")))
if ("PG.NrOfStrippedSequencesIdentified.Experiment.wide" %in% colnames(filtdf) &&
!is.null(minPeptides)) {
Expand All @@ -601,9 +610,9 @@ filterSpectronaut <- function(sce, minScore, minPeptides, plotUpset = TRUE,
if ("Reverse" %in% colnames(rowData(sce))) {
keep <- intersect(keep, which(rowData(sce)$Reverse == ""))
}
# if ("Potential.contaminant" %in% colnames(rowData(sce))) {
# keep <- intersect(keep, which(rowData(sce)$Potential.contaminant == ""))
# }
if ("Contaminant" %in% colnames(rowData(sce))) {
keep <- intersect(keep, which(rowData(sce)$Contaminant == ""))
}
if ("PG.NrOfStrippedSequencesIdentified.Experiment.wide" %in% colnames(rowData(sce)) &&
!is.null(minPeptides)) {
keep <- intersect(
Expand Down
25 changes: 23 additions & 2 deletions R/doNormalization.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
#'
#' @param sce A \code{SummarizedExperiment} object (or a derivative).
#' @param method Character scalar giving the normalization method. Currently,
#' the methods from \code{MsCoreUtils::normalizeMethods()} are supported.
#' the methods from \code{MsCoreUtils::normalizeMethods()} are supported,
#' together with "center.mean.shared" and "center.median.shared",
#' subtracting the mean or median, respectively, across features that are
#' observed in all samples.
#' If \code{spikeFeatures} is not \code{NULL}, only
#' \code{"center.mean"}, \code{"center.median"}, \code{"div.mean"} and
#' \code{"div.median"} are supported.
Expand Down Expand Up @@ -56,7 +59,9 @@ doNormalization <- function(sce, method, assayName, normalizedAssayName,
spikeFeatures = NULL) {
.assertVector(x = sce, type = "SummarizedExperiment")
.assertScalar(x = method, type = "character",
validValues = MsCoreUtils::normalizeMethods())
validValues = c(MsCoreUtils::normalizeMethods(),
"center.mean.shared",
"center.median.shared"))
.assertScalar(x = assayName, type = "character",
validValues = SummarizedExperiment::assayNames(sce))
.assertScalar(x = normalizedAssayName, type = "character")
Expand Down Expand Up @@ -94,6 +99,22 @@ doNormalization <- function(sce, method, assayName, normalizedAssayName,
assayOut <-
MsCoreUtils::normalize_matrix(assayIn,
method = method)
} else if (method == "center.median.shared") {
idx <- which(rowSums(is.na(assayIn)) == 0)
if (length(idx) == 0) {
stop("No features observed in all samples")
}
assayOut <- sweep(assayIn, MARGIN = 2,
STATS = apply(assayIn[idx, , drop = FALSE], 2, stats::median),
FUN = "-")
} else if (method == "center.mean.shared") {
idx <- which(rowSums(is.na(assayIn)) == 0)
if (length(idx) == 0) {
stop("No features observed in all samples")
}
assayOut <- sweep(assayIn, MARGIN = 2,
STATS = apply(assayIn[idx, , drop = FALSE], 2, mean),
FUN = "-")
} else {
## Should never end up here as we check the validity of method above
#nocov start
Expand Down
35 changes: 27 additions & 8 deletions R/plotImputation.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#' @param assayImputation Character scalar indicating the name of a
#' logical assay of \code{sce} to use for filling the distribution plots.
#' @param xlab Character scalar providing the x-axis label for the plot.
#' @param plotType Character scalar indicating the type of plot to make
#' (either "histogram" or "density").
#'
#' @export
#' @author Charlotte Soneson
Expand All @@ -30,13 +32,15 @@
#' @importFrom rlang .data
#'
plotImputationDistribution <- function(sce, assayToPlot, assayImputation,
xlab = "") {
xlab = "", plotType = "histogram") {
.assertVector(x = sce, type = "SummarizedExperiment")
.assertScalar(x = assayToPlot, type = "character",
validValues = SummarizedExperiment::assayNames(sce))
.assertScalar(x = assayImputation, type = "character",
validValues = SummarizedExperiment::assayNames(sce))
.assertScalar(x = xlab, type = "character")
.assertScalar(x = plotType, type = "character",
validValues = c("histogram", "density"))

plotdf <- as.data.frame(
SummarizedExperiment::assay(sce, assayToPlot)) %>%
Expand All @@ -49,11 +53,26 @@ plotImputationDistribution <- function(sce, assayToPlot, assayImputation,
tidyr::gather(key = "sample", value = "imputed", -"pid"),
by = c("pid", "sample")
)
ggplot2::ggplot(plotdf, ggplot2::aes(x = .data$log2intensity,
fill = .data$imputed)) +
ggplot2::geom_histogram(bins = 50) +
ggplot2::facet_wrap(~ sample) +
ggplot2::theme_bw() + ggplot2::labs(x = xlab) +
ggplot2::scale_fill_manual(values = c(`TRUE` = "grey",
`FALSE` = "firebrick1"))
if (plotType == "histogram") {
ggplot2::ggplot(plotdf, ggplot2::aes(x = .data$log2intensity,
fill = .data$imputed)) +
ggplot2::geom_histogram(bins = 50) +
ggplot2::facet_wrap(~ sample) +
ggplot2::theme_bw() + ggplot2::labs(x = xlab) +
ggplot2::scale_fill_manual(values = c(`TRUE` = "grey",
`FALSE` = "firebrick1"))
} else if (plotType == "density") {
ggplot2::ggplot(plotdf, ggplot2::aes(x = .data$log2intensity,
color = .data$imputed)) +
ggplot2::geom_density(linewidth = 1.5) +
ggplot2::facet_wrap(~ sample) +
ggplot2::theme_bw() + ggplot2::labs(x = xlab) +
ggplot2::scale_color_manual(values = c(`TRUE` = "grey",
`FALSE` = "firebrick1"))
} else {
## Should never end up here as the parameter is checked above
#nocov start
stop("Unknown value of the plotType parameter")
#nocov end
}
}
Loading

0 comments on commit fb53942

Please sign in to comment.