Merge pull request #29 from AlexsLemonade/allyhawkins/add-processing-…

…info Add processing information to QC report
AlexsLemonade · Sep 2, 2021 · fc23948 · fc23948
2 parents a8e7f41 + 41ea5e3
commit fc23948
Show file tree

Hide file tree

Showing 5 changed files with 72 additions and 8 deletions.
diff --git a/R/import_quant_data.R b/R/import_quant_data.R
@@ -15,6 +15,7 @@
 #'   Filtering is performed using DropletUtils::emptyDrops and cannot be performed with Cellranger.
 #' @param fdr_cutoff FDR cutoff to use for DropletUtils::emptyDrops.
 #'   Default is 0.01.
+#' @param tech_version Technology or kit used to process library (i.e. 10Xv3, 10Xv3.1).
 #' @param ... Any arguments to be passed into DropletUtils::emptyDrops.
 #'
 #' @return SingleCellExperiment of unfiltered gene x cell counts matrix
@@ -59,6 +60,7 @@ import_quant_data <- function(quant_dir,
                               usa_mode = FALSE,
                               filter = FALSE,
                               fdr_cutoff = 0.01,
+                              tech_version = NULL,
                               ...) {
 
   which_counts <- match.arg(which_counts)
@@ -103,7 +105,7 @@ import_quant_data <- function(quant_dir,
   }
 
   if (tool %in% c("alevin-fry", "alevin")){
-    sce <- read_alevin(quant_dir, intron_mode, usa_mode, which_counts)
+    sce <- read_alevin(quant_dir, intron_mode, usa_mode, which_counts, tech_version)
   } else if (tool == "kallisto") {
     sce <- read_kallisto(quant_dir, intron_mode, which_counts)
   } else if (tool == "cellranger") {

diff --git a/R/read_alevin.R b/R/read_alevin.R
@@ -12,6 +12,7 @@
 #'   only counts aligned to spliced cDNA ("spliced") or all spliced and unspliced cDNA ("unspliced").
 #'   Applies if `intron_mode` or `usa_mode` is TRUE.
 #'   Default is "spliced".
+#' @param tech_version Technology or kit used to process library (i.e. 10Xv3, 10Xv3.1).
 #'
 #' @return SingleCellExperiment of unfiltered gene x cell counts matrix.
 #' @export
@@ -43,7 +44,8 @@ read_alevin <- function(quant_dir,
                         mtx_format = FALSE,
                         intron_mode = FALSE,
                         usa_mode = FALSE,
-                        which_counts = c("spliced", "unspliced")){
+                        which_counts = c("spliced", "unspliced"),
+                        tech_version = NULL){
 
   which_counts <- match.arg(which_counts)
 
@@ -69,7 +71,7 @@ read_alevin <- function(quant_dir,
   }
 
   # read metadata
-  meta <- read_alevin_metadata(quant_dir)
+  meta <- read_alevin_metadata(quant_dir, tech_version)
 
   # Read the count data
   if(mtx_format | usa_mode) {
@@ -151,28 +153,36 @@ read_tximport <- function(quant_dir){
 #' Read alevin metadata from json files
 #'
 #' @param quant_dir Path alevin output directory.
+#' @param tech_version Technology or kit used to process library (i.e. 10Xv3, 10Xv3.1).
 #'
 #' @return A list containing alevin run metadata,
 #'   with NULL values for missing elements.
 #'
 #' @noRd
-read_alevin_metadata <- function(quant_dir){
+read_alevin_metadata <- function(quant_dir, tech_version){
   cmd_info_path <- file.path(quant_dir, "cmd_info.json")
   permit_json_path <- file.path(quant_dir, "generate_permit_list.json")
   # Unused file, but leaving for future reference
   # collate_json_path <- file.path(quant_dir, "collate.json")
   quant_json_path <- file.path(quant_dir, "quant.json")
+  aux_meta_path <- file.path(quant_dir, "aux_info", "meta_info.json")
+
   if(!file.exists(quant_json_path)){
     # file for alevin-fry < 0.4.1
     quant_json_path <- file.path(quant_dir, "meta_info.json")
   }
 
-  # get cmd_info, which should always be present
+  # get cmd_info and aux_info/meta_info.json, which should always be present
   if (file.exists(cmd_info_path)){
     cmd_info <- jsonlite::read_json(cmd_info_path)
   } else {
     stop("cmd_info.json is missing")
   }
+  if (file.exists(aux_meta_path)){
+    aux_meta <- jsonlite::read_json(aux_meta_path)
+  } else {
+    stop("meta_info.json in aux_info folder is missing")
+  }
 
   # Read other info files if they exist. Otherwise, create dummy values
   if (file.exists(permit_json_path)){
@@ -188,7 +198,9 @@ read_alevin_metadata <- function(quant_dir){
 
   # Create a metadata list
   meta <- list(salmon_version = cmd_info$salmon_version,
-               reference_index = cmd_info[['index']])
+               reference_index = cmd_info[['index']],
+               total_reads = aux_meta[['num_processed']],
+               mapped_reads = aux_meta[['num_mapped']])
   # using $ notation  for `salmon_version` to get partial matching due to salmon 1.5.2 bug
   # see https://github.com/COMBINE-lab/salmon/issues/691
 
@@ -206,6 +218,9 @@ read_alevin_metadata <- function(quant_dir){
   meta$af_resolution <- quant_info[['resolution_strategy']]
   meta$af_tx2gene <- cmd_info[['tgMap']]
   meta$usa_mode <- quant_info[['usa_mode']]
+  meta$af_num_cells <- quant_info[['num_quantified_cells']]
+  meta$tech_version <- tech_version
+
 
   return(meta)
 }

diff --git a/inst/rmd/qc_report.rmd b/inst/rmd/qc_report.rmd
@@ -20,7 +20,48 @@ output:
 library(SingleCellExperiment)
 ```
 
-# `r params$sample` Experiment Information
+# Processing Information for `r params$sample`
+
+## Sample Metrics
+
+```{r echo=FALSE}
+sample_information <- tibble::tibble(
+  "Sample id" = params$sample,
+  "Tech version" = metadata_list$tech_version,
+  "Number of reads sequenced" = format(metadata_list$total_reads, big.mark = ',', scientific = FALSE),
+  "Number of mapped reads" = format(metadata_list$mapped_reads, big.mark = ',', scientific = FALSE),
+  "Number of cells reported by alevin-fry" = format(metadata_list$af_num_cells, big.mark = ',', scientific = FALSE)
+) %>%
+  t()
+
+# make table with sample information
+knitr::kable(sample_information, "simple")
+```
+
+## Pre-Processing
+
+```{r echo=FALSE}
+# extract sce metadata containing processing information as table
+metadata_list <- metadata(params$sce) 
+
+processing_info <- tibble::tibble(
+  "Salmon version" = metadata_list$salmon_version,
+  "Alevin-fry version" = metadata_list$alevinfry_version,
+  "Transcriptome index" = metadata_list$reference_index,
+  "Filtering method" = metadata_list$af_permit_type,
+  "Resolution" = metadata_list$af_resolution, 
+  "Transcripts included" = dplyr::case_when(
+      metadata_list$transcript_type == "spliced" ~ "Spliced only",
+      metadata_list$transcript_type == "unspliced" ~ "Spliced and unspliced" )
+  ) %>%
+  t()
+
+
+# make table with processing information
+knitr::kable(processing_info, "simple")
+```
+
+# `r params$sample` Experiment Summary 
 
 This sample has `r ncol(params$sce)` cells, assayed for `r nrow(params$sce)` genes.
 

diff --git a/man/import_quant_data.Rd b/man/import_quant_data.Rd
diff --git a/man/read_alevin.Rd b/man/read_alevin.Rd