Skip to content

Commit

Permalink
Merge pull request #29 from AlexsLemonade/allyhawkins/add-processing-…
Browse files Browse the repository at this point in the history
…info

Add processing information to QC report
  • Loading branch information
allyhawkins committed Sep 2, 2021
2 parents a8e7f41 + 41ea5e3 commit fc23948
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 8 deletions.
4 changes: 3 additions & 1 deletion R/import_quant_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#' Filtering is performed using DropletUtils::emptyDrops and cannot be performed with Cellranger.
#' @param fdr_cutoff FDR cutoff to use for DropletUtils::emptyDrops.
#' Default is 0.01.
#' @param tech_version Technology or kit used to process library (i.e. 10Xv3, 10Xv3.1).
#' @param ... Any arguments to be passed into DropletUtils::emptyDrops.
#'
#' @return SingleCellExperiment of unfiltered gene x cell counts matrix
Expand Down Expand Up @@ -59,6 +60,7 @@ import_quant_data <- function(quant_dir,
usa_mode = FALSE,
filter = FALSE,
fdr_cutoff = 0.01,
tech_version = NULL,
...) {

which_counts <- match.arg(which_counts)
Expand Down Expand Up @@ -103,7 +105,7 @@ import_quant_data <- function(quant_dir,
}

if (tool %in% c("alevin-fry", "alevin")){
sce <- read_alevin(quant_dir, intron_mode, usa_mode, which_counts)
sce <- read_alevin(quant_dir, intron_mode, usa_mode, which_counts, tech_version)
} else if (tool == "kallisto") {
sce <- read_kallisto(quant_dir, intron_mode, which_counts)
} else if (tool == "cellranger") {
Expand Down
25 changes: 20 additions & 5 deletions R/read_alevin.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#' only counts aligned to spliced cDNA ("spliced") or all spliced and unspliced cDNA ("unspliced").
#' Applies if `intron_mode` or `usa_mode` is TRUE.
#' Default is "spliced".
#' @param tech_version Technology or kit used to process library (i.e. 10Xv3, 10Xv3.1).
#'
#' @return SingleCellExperiment of unfiltered gene x cell counts matrix.
#' @export
Expand Down Expand Up @@ -43,7 +44,8 @@ read_alevin <- function(quant_dir,
mtx_format = FALSE,
intron_mode = FALSE,
usa_mode = FALSE,
which_counts = c("spliced", "unspliced")){
which_counts = c("spliced", "unspliced"),
tech_version = NULL){

which_counts <- match.arg(which_counts)

Expand All @@ -69,7 +71,7 @@ read_alevin <- function(quant_dir,
}

# read metadata
meta <- read_alevin_metadata(quant_dir)
meta <- read_alevin_metadata(quant_dir, tech_version)

# Read the count data
if(mtx_format | usa_mode) {
Expand Down Expand Up @@ -151,28 +153,36 @@ read_tximport <- function(quant_dir){
#' Read alevin metadata from json files
#'
#' @param quant_dir Path alevin output directory.
#' @param tech_version Technology or kit used to process library (i.e. 10Xv3, 10Xv3.1).
#'
#' @return A list containing alevin run metadata,
#' with NULL values for missing elements.
#'
#' @noRd
read_alevin_metadata <- function(quant_dir){
read_alevin_metadata <- function(quant_dir, tech_version){
cmd_info_path <- file.path(quant_dir, "cmd_info.json")
permit_json_path <- file.path(quant_dir, "generate_permit_list.json")
# Unused file, but leaving for future reference
# collate_json_path <- file.path(quant_dir, "collate.json")
quant_json_path <- file.path(quant_dir, "quant.json")
aux_meta_path <- file.path(quant_dir, "aux_info", "meta_info.json")

if(!file.exists(quant_json_path)){
# file for alevin-fry < 0.4.1
quant_json_path <- file.path(quant_dir, "meta_info.json")
}

# get cmd_info, which should always be present
# get cmd_info and aux_info/meta_info.json, which should always be present
if (file.exists(cmd_info_path)){
cmd_info <- jsonlite::read_json(cmd_info_path)
} else {
stop("cmd_info.json is missing")
}
if (file.exists(aux_meta_path)){
aux_meta <- jsonlite::read_json(aux_meta_path)
} else {
stop("meta_info.json in aux_info folder is missing")
}

# Read other info files if they exist. Otherwise, create dummy values
if (file.exists(permit_json_path)){
Expand All @@ -188,7 +198,9 @@ read_alevin_metadata <- function(quant_dir){

# Create a metadata list
meta <- list(salmon_version = cmd_info$salmon_version,
reference_index = cmd_info[['index']])
reference_index = cmd_info[['index']],
total_reads = aux_meta[['num_processed']],
mapped_reads = aux_meta[['num_mapped']])
# using $ notation for `salmon_version` to get partial matching due to salmon 1.5.2 bug
# see https://github.com/COMBINE-lab/salmon/issues/691

Expand All @@ -206,6 +218,9 @@ read_alevin_metadata <- function(quant_dir){
meta$af_resolution <- quant_info[['resolution_strategy']]
meta$af_tx2gene <- cmd_info[['tgMap']]
meta$usa_mode <- quant_info[['usa_mode']]
meta$af_num_cells <- quant_info[['num_quantified_cells']]
meta$tech_version <- tech_version


return(meta)
}
Expand Down
43 changes: 42 additions & 1 deletion inst/rmd/qc_report.rmd
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,48 @@ output:
library(SingleCellExperiment)
```

# `r params$sample` Experiment Information
# Processing Information for `r params$sample`

## Sample Metrics

```{r echo=FALSE}
sample_information <- tibble::tibble(
"Sample id" = params$sample,
"Tech version" = metadata_list$tech_version,
"Number of reads sequenced" = format(metadata_list$total_reads, big.mark = ',', scientific = FALSE),
"Number of mapped reads" = format(metadata_list$mapped_reads, big.mark = ',', scientific = FALSE),
"Number of cells reported by alevin-fry" = format(metadata_list$af_num_cells, big.mark = ',', scientific = FALSE)
) %>%
t()
# make table with sample information
knitr::kable(sample_information, "simple")
```

## Pre-Processing

```{r echo=FALSE}
# extract sce metadata containing processing information as table
metadata_list <- metadata(params$sce)
processing_info <- tibble::tibble(
"Salmon version" = metadata_list$salmon_version,
"Alevin-fry version" = metadata_list$alevinfry_version,
"Transcriptome index" = metadata_list$reference_index,
"Filtering method" = metadata_list$af_permit_type,
"Resolution" = metadata_list$af_resolution,
"Transcripts included" = dplyr::case_when(
metadata_list$transcript_type == "spliced" ~ "Spliced only",
metadata_list$transcript_type == "unspliced" ~ "Spliced and unspliced" )
) %>%
t()
# make table with processing information
knitr::kable(processing_info, "simple")
```

# `r params$sample` Experiment Summary

This sample has `r ncol(params$sce)` cells, assayed for `r nrow(params$sce)` genes.

Expand Down
3 changes: 3 additions & 0 deletions man/import_quant_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/read_alevin.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit fc23948

Please sign in to comment.