Merge branch 'main' into targets-factories

ClavelLab · May 6, 2024 · 7f43051 · 7f43051
2 parents f94d897 + 62ecd77
commit 7f43051
Show file tree

Hide file tree

Showing 10 changed files with 97 additions and 8 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -22,7 +22,8 @@ Description: Convenient wrapper functions for the analysis of
     taxonomic identification MALDI-TOF data is made easy with functions to
     import inconsistently formatted reports.
 License: GPL (>= 3)
-URL: https://github.com/ClavelLab/maldipickr
+URL: https://github.com/ClavelLab/maldipickr,
+    https://clavellab.github.io/maldipickr/
 BugReports: https://github.com/ClavelLab/maldipickr/issues
 Depends: 
     R (>= 3.2.0)

diff --git a/R/import_biotyper_spectra.R b/R/import_biotyper_spectra.R
@@ -64,14 +64,14 @@ import_biotyper_spectra <- function(biotyper_directory, remove_calibration = c("
   })
   biotyper_list <- unname(biotyper_list)
   # Catch the arguments of which spectrum to remove
-  remove_calibration <- match.arg(remove_calibration, several.ok = T)
+  remove_calibration <- match.arg(remove_calibration, several.ok = TRUE)
   if (length(remove_calibration) != 0) {
     # Regex to remove the chosen spectrum
     to_catch <- c(
       "BTS" = "BTS|BTS_Validation",
       "Autocalibration" = "Autocalibration"
     )
-    regex_to_catch <- match.arg(remove_calibration, to_catch, several.ok = T) %>%
+    regex_to_catch <- match.arg(remove_calibration, to_catch, several.ok = TRUE) %>%
       paste(collapse = "|")
     to_remove <- vapply(biotyper_list,
                         function(x){grepl(regex_to_catch, MALDIquant::metaData(x)$file)},

diff --git a/R/merge_processed_spectra.R b/R/merge_processed_spectra.R
@@ -5,13 +5,18 @@
 #' Aggregate multiple processed spectra, their associated peaks and metadata into a feature matrix and a concatenated metadata table.
 #'
 #' @param processed_spectra A [list] of the processed spectra and associated peaks and metadata in two possible formats:
-#' * A list of **in-memory objects** (named `spectra`, `peaks`, `metadata`) produced by [process_spectra].
+#' * A list of **in-memory objects** (named `spectra`, `peaks`, `metadata`) produced by [process_spectra]. Named lists will have names dropped, see Note.
 #' * `r lifecycle::badge('deprecated')` A list of **paths** to RDS files produced by [process_spectra] when using the `rds_prefix` option.
 #' @param remove_peakless_spectra A logical indicating whether to discard the spectra without detected peaks.
 #' @param interpolate_missing A logical indicating if intensity values for missing peaks should be interpolated from the processed spectra signal or left NA which would then be converted to 0.
 #'
 #' @return A *n*×*p* matrix, with *n* spectra as rows and *p* features as columns that are the peaks found in all the processed spectra.
 #'
+#' @note When aggregating multiple runs of processed spectra, if a named list is
+#' provided, note that the names will be dropped, to prevent further downstream
+#' issues when these names were being appended to the rownames of the matrix
+#' thus preventing downstream metadata merge.
+#'
 #' @seealso [process_spectra], the "Value" section in [`MALDIquant::intensityMatrix`](https://rdrr.io/cran/MALDIquant/man/intensityMatrix-functions.html)
 #' @export
 #' @examples
@@ -43,6 +48,14 @@
 #' # The feature matrix has 3×6=18 spectra as rows and
 #' #  35 peaks as columns
 #' dim(fm_all)
+#'
+#' # If using a list, names will be dropped and are not propagated to the matrix.
+#' \dontrun{
+#' fm_all <- merge_processed_spectra(
+#'  list("A" = processed, "B" = processed, "C" = processed))
+#' any(grepl("A|B|C", rownames(fm_all))) # FALSE
+#'  }
+#' 
 merge_processed_spectra <- function(processed_spectra, remove_peakless_spectra = TRUE, interpolate_missing = TRUE) {
   if (any(
     is.null(processed_spectra),
@@ -68,6 +81,12 @@ merge_processed_spectra <- function(processed_spectra, remove_peakless_spectra =
     processed <- processed_spectra
   }
 
+  # Names at the upper level causes problems when aggregating multiple runs by
+  #  being appended to the rownames of matrix thus preventing downstream metadata
+  #  merge.
+  if(!is.null(names(processed))){
+    processed <- unname(processed)
+  }
   stopifnot(is_a_processed_spectra_list(processed))
 
   peakless <- list()

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -2,6 +2,10 @@
 # src: https://pkgdown.r-lib.org/articles/metadata.html
 url: https://clavellab.github.io/maldipickr
 
+home:
+  title: "Reduce the redundancy of bacterial isolates"
+  description: "A R package to dereplicate and cherry-pick mass spectrometry spectra obtained from bacterial isolates."
+
 template:
   bootswatch: litera
   bootstrap: 5
@@ -10,6 +14,10 @@ template:
     code_font: "Hack"
     primary: "#cf5c36"
   theme: "tango"
+  opengraph:
+    image:
+      src: https://repository-images.githubusercontent.com/613291736/be5379b5-ce72-430d-bb4c-70155ba6a0ad
+      alt: "maldipickr logo with the legend Dereplicate and cherry-pick mass spectrometry spectra"
 
 reference:
   - title: "Import"
@@ -56,3 +64,6 @@ news:
     href: "https://clavellab.github.io/maldipickr/news/index.html#maldipickr-120"
   - text: "Version 1.1.1"
     href: "https://clavellab.github.io/maldipickr/news/index.html#maldipickr-111"
+
+development:
+  mode: auto
diff --git a/dev/dereplicate-spectra.Rmd b/dev/dereplicate-spectra.Rmd
@@ -217,13 +217,18 @@ The current function enables the analyst to decide whether to interpolate the va
 #' Aggregate multiple processed spectra, their associated peaks and metadata into a feature matrix and a concatenated metadata table.
 #'
 #' @param processed_spectra A [list] of the processed spectra and associated peaks and metadata in two possible formats:
-#' * A list of **in-memory objects** (named `spectra`, `peaks`, `metadata`) produced by [process_spectra].
+#' * A list of **in-memory objects** (named `spectra`, `peaks`, `metadata`) produced by [process_spectra]. Named lists will have names dropped, see Note.
 #' * `r lifecycle::badge('deprecated')` A list of **paths** to RDS files produced by [process_spectra] when using the `rds_prefix` option.
 #' @param remove_peakless_spectra A logical indicating whether to discard the spectra without detected peaks.
 #' @param interpolate_missing A logical indicating if intensity values for missing peaks should be interpolated from the processed spectra signal or left NA which would then be converted to 0.
 #'
 #' @return A *n*×*p* matrix, with *n* spectra as rows and *p* features as columns that are the peaks found in all the processed spectra.
 #'
+#' @note When aggregating multiple runs of processed spectra, if a named list is
+#' provided, note that the names will be dropped, to prevent further downstream
+#' issues when these names were being appended to the rownames of the matrix
+#' thus preventing downstream metadata merge.
+#'
 #' @seealso [process_spectra], the "Value" section in [`MALDIquant::intensityMatrix`](https://rdrr.io/cran/MALDIquant/man/intensityMatrix-functions.html)
 #' @export
 merge_processed_spectra <- function(processed_spectra, remove_peakless_spectra = TRUE, interpolate_missing = TRUE) {
@@ -251,6 +256,12 @@ merge_processed_spectra <- function(processed_spectra, remove_peakless_spectra =
     processed <- processed_spectra
   }
 
+  # Names at the upper level causes problems when aggregating multiple runs by
+  #  being appended to the rownames of matrix thus preventing downstream metadata
+  #  merge.
+  if(!is.null(names(processed))){
+    processed <- unname(processed)
+  }
   stopifnot(is_a_processed_spectra_list(processed))
 
   peakless <- list()
@@ -335,6 +346,14 @@ fm_all <- merge_processed_spectra(list(processed, processed, processed))
 # The feature matrix has 3×6=18 spectra as rows and
 #  35 peaks as columns
 dim(fm_all)
+
+# If using a list, names will be dropped and are not propagated to the matrix.
+#' \dontrun{
+#' fm_all <- merge_processed_spectra(
+#'  list("A" = processed, "B" = processed, "C" = processed))
+#' any(grepl("A|B|C", rownames(fm_all))) # FALSE
+#'  }
+#' 
 ```
 
 ```{r tests-merge_processed_spectra}
@@ -351,6 +370,14 @@ test_that("merge_processed_spectra works", {
   expect_identical(
     sum(fm == 0), 0L
   )
+  expect_no_error(
+    fm_multiple <- merge_processed_spectra(
+      list("with_name_bar" = processed_test, "with_name_foo" = processed_test)
+    )
+  )
+  expect_equal(
+    dim(fm_multiple), c(4, 26)
+  )
 })
 test_that("merge_processed_spectra works without interpolation", {
   expect_no_error(

diff --git a/dev/import-data.Rmd b/dev/import-data.Rmd
@@ -516,14 +516,14 @@ import_biotyper_spectra <- function(biotyper_directory, remove_calibration = c("
   })
   biotyper_list <- unname(biotyper_list)
   # Catch the arguments of which spectrum to remove
-  remove_calibration <- match.arg(remove_calibration, several.ok = T)
+  remove_calibration <- match.arg(remove_calibration, several.ok = TRUE)
   if (length(remove_calibration) != 0) {
     # Regex to remove the chosen spectrum
     to_catch <- c(
       "BTS" = "BTS|BTS_Validation",
       "Autocalibration" = "Autocalibration"
     )
-    regex_to_catch <- match.arg(remove_calibration, to_catch, several.ok = T) %>%
+    regex_to_catch <- match.arg(remove_calibration, to_catch, several.ok = TRUE) %>%
       paste(collapse = "|")
     to_remove <- vapply(biotyper_list,
                         function(x){grepl(regex_to_catch, MALDIquant::metaData(x)$file)},

diff --git a/man/maldipickr-package.Rd b/man/maldipickr-package.Rd
diff --git a/man/merge_processed_spectra.Rd b/man/merge_processed_spectra.Rd
diff --git a/tests/testthat/test-merge_processed_spectra.R b/tests/testthat/test-merge_processed_spectra.R
@@ -13,6 +13,14 @@ test_that("merge_processed_spectra works", {
   expect_identical(
     sum(fm == 0), 0L
   )
+  expect_no_error(
+    fm_multiple <- merge_processed_spectra(
+      list("with_name_bar" = processed_test, "with_name_foo" = processed_test)
+    )
+  )
+  expect_equal(
+    dim(fm_multiple), c(4, 26)
+  )
 })
 test_that("merge_processed_spectra works without interpolation", {
   expect_no_error(

diff --git a/vignettes/dereplicate-bruker-maldi-biotyper-spectra.Rmd b/vignettes/dereplicate-bruker-maldi-biotyper-spectra.Rmd
@@ -109,6 +109,14 @@ fm_all <- merge_processed_spectra(list(processed, processed, processed))
 # The feature matrix has 3×6=18 spectra as rows and
 #  35 peaks as columns
 dim(fm_all)
+
+# If using a list, names will be dropped and are not propagated to the matrix.
+#' \dontrun{
+#' fm_all <- merge_processed_spectra(
+#'  list("A" = processed, "B" = processed, "C" = processed))
+#' any(grepl("A|B|C", rownames(fm_all))) # FALSE
+#'  }
+#' 
 ```