Skip to content

Commit

Permalink
Merge pull request #106 from omnideconv/custom_signature_and_sth_else
Browse files Browse the repository at this point in the history
Custom signature and sth else
  • Loading branch information
LorenzoMerotto authored May 30, 2022
2 parents bb686a2 + 6e1d688 commit 3c24859
Show file tree
Hide file tree
Showing 33 changed files with 725 additions and 106 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ CONTRIBUTING\.md$
Makefile$
_pkgdown.yml$
\.conda
^doc$
^Meta$
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ inst/doc/*
vignettes/*.html
vignettes/*.pdf

# ignore cibersort-related tests and files
CIBERSORT.R
LM22.txt
test_cibersort.R
CIBERSORT-Results.txt

# ignore OS X Finder files
*.DS_Store

Expand All @@ -28,3 +34,5 @@ datasets

# docs are generated by travis and deployed on the gh-pages branch
docs
/doc/
/Meta/
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,5 @@ Roxygen: list(markdown = TRUE)
LazyData: true
URL: https:/omnideconv.org/immunedeconv, https://github.com/omnideconv/immunedeconv
BugReports: https://github.com/omnideconv/immunedeconv/issues
RoxygenNote: 7.1.2
RoxygenNote: 7.2.0

5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,18 @@
export(available_datasets)
export(cell_type_map)
export(cell_type_tree)
export(custom_deconvolution_methods)
export(deconvolute)
export(deconvolute_abis)
export(deconvolute_base_algorithm)
export(deconvolute_base_custom)
export(deconvolute_cibersort)
export(deconvolute_cibersort_custom)
export(deconvolute_consensus_tme)
export(deconvolute_consensus_tme_custom)
export(deconvolute_dcq)
export(deconvolute_epic)
export(deconvolute_epic_custom)
export(deconvolute_mcp_counter)
export(deconvolute_mmcp_counter)
export(deconvolute_mouse)
Expand Down
60 changes: 60 additions & 0 deletions R/BASE.R
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,63 @@ base_algorithm <- function(data, reg, perm=100, median.norm=T)

return(CLP.scores)
}



#' Souce code to create the compendium used kin the BASE algorithm, containing
#' up- and down-regulated weight sets that specify the
# specificity by which each gene is expressed by a given cell.
#' This code is adapted from Varn et al., DOI: 10.1158/0008-5472.CAN-16-2490
#'
#' @param signature_matrix: numeric matrix; The signature matrix from which the compendium will be built.
#' Must contain genes on rows and cell on columns
#'
create_base_compendium = function(signature_matrix){

myinf1 = signature_matrix

med = apply(myinf1, 1, median)
myinf1 = myinf1-med

avg = apply(myinf1, 2, mean)
std = apply(myinf1, 2, sd)
for(k in 1:ncol(myinf1))
{
myinf1[,k] = (myinf1[,k]-avg[k])/std[k]
}


res1 = myinf1
for(k in 1:ncol(res1))
{
tmp = myinf1[,k]
tmp[tmp<0]=0
tmp = -log10(pnorm(-tmp)*2)
tmp[tmp>10]=10
res1[,k] = tmp
}
colnames(res1) = paste(colnames(myinf1), "_up", sep="")

res2 = myinf1
for(k in 1:ncol(res2))
{
tmp = myinf1[,k]
tmp[tmp>0]=0
tmp = -log10(pnorm(tmp)*2)
tmp[tmp>10]=10
res2[,k] = tmp
}
colnames(res2) = paste(colnames(myinf1), "_dn", sep="")

res = cbind(res1, res2)

minv = min(res)
maxv= max(res)
res = (res-minv)/(maxv-minv)
colnames(res) = gsub(" ", "", colnames(res))


colnames(res) <- toupper(colnames(res))

return(res)
}
173 changes: 173 additions & 0 deletions R/custom_deconvolution_methods.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
#' Collection of deconvolution methods that allow custom signature matrices.
#'
#' @import methods
#' @import dplyr
#' @importFrom testit assert
#' @import readr
#' @importFrom tibble as_tibble
#' @importFrom EPIC EPIC
#' @importFrom rlang dots_list
#' @importFrom stats aggregate lm lsfit median qqplot
#' @importFrom utils capture.output read.csv read.table tail write.table
#'
#' @name custom_deconvolution
#' @docType package
NULL



#' List of methods that support the use of a custom signature
#'
#' The available methods are
#' `epic`, `cibersort`, `cibersort_abs`, `consensus_tme`, `base`
#'
#' The object is a named vector. The names correspond to the display name of the method,
#' the values to the internal name.
#'
#' @export
custom_deconvolution_methods = c("EPIC"="epic",
"CIBERSORT"="cibersort",
"ConsensusTME"="consensus_tme",
"BASE"="base")


###########################################################################
# Deconvolution with custom signature matrix
#
# These functions let the users specify a custom signature matrix for the analysis
###########################################################################



#' Deconvolute using CIBERSORT or CIBERSORT abs and a custom signature matrix.
#'
#' @param gene_expression_matrix a m x n matrix with m genes and n samples
#' @param signature_matrix a m x l matrix with m genes and l cell types. The
#' matrix should contain only a subset of the genes useful for the analysis.
#' @param QN boolean. Wheter to quantile normalize the data. Data should be normalized
#' when the signature matrix is derived from different studies/sample batches
#' @param absolute Set to TRUE for CIBERSORT absolute mode.
#' @param abs_method Choose method to compute absolute score (only if `absolute=TRUE`).
#' @param ... passed through to the original CIBERSORT function. A native argument takes precedence
#' over an immunedeconv argument (e.g. `QN` takes precedence over `arrays`). Documentation
#' is not publicly available. Log in to the CIBERSORT website for details.
#'
#' @note the gene expression and the signature matrix should be provided in the same normalization
#' @export
deconvolute_cibersort_custom = function(gene_expression_matrix, signature_matrix, QN = FALSE,
absolute = FALSE, abs_method="sig.score",
...){


assert("CIBERSORT.R is provided", exists("cibersort_binary", envir=config_env))
source(get("cibersort_binary", envir=config_env))

temp.expression.file = tempfile()
temp.signature.file = tempfile()
write_tsv(as_tibble(gene_expression_matrix, rownames = 'gene_symbol'), path = temp.expression.file)
write_tsv(as_tibble(signature_matrix, rownames = 'gene_symbol'), path = temp.signature.file)


arguments = dots_list(temp.signature.file, temp.expression.file, perm=0,
QN=QN, absolute=absolute, abs_method=abs_method, ..., .homonyms="last")

call = rlang::call2(CIBERSORT, !!!arguments)

results = eval(call)
results = results %>%
.[, !colnames(.) %in% c("RMSE", "P-value", "Correlation")]


return(t(results))

}

#' Deconvolute using EPIC and a custom signature matrix.
#'
#' @param gene_expression_matrix a m x n matrix with m genes and n samples
#' @param signature_matrix a m x l matrix with m genes and l cell types. This matrix
#' should contain the whole set of genes
#' @param signature_genes a character vector of the gene names to use as signature
#' needs to be smaller than the genes in the signature matrix
#' @param genes_var (optional) a m x l matrix with m genes and l cell types, with
#' the variability of each gene expression for each cell type.
#' This will be used in the optimization
#' @param mrna_cells (optional) A named numeric vector with
#' the amount of mRNA in arbitrary units for each of the
#' reference cells and of the other uncharacterized cells.
#' @param ... passed through to EPIC. A native argument takes precedence
#' over an immunedeconv argument.
#' See [EPIC](https://rdrr.io/github/GfellerLab/EPIC/man/EPIC.html)
#' @export
deconvolute_epic_custom = function(gene_expression_matrix, signature_matrix,
signature_genes, genes_var = NULL, mrna_quantities = NULL,
...){

ref = list()
ref$refProfiles <- signature_matrix
ref$sigGenes <- signature_genes
if(!is.null(genes_var)){ref$refProfiles.var <- genes_var}

mrna_cell = mrna_quantities
if(is.null(mrna_quantities)){mRNA_cell = c("default"=1.)}

arguments = dots_list(bulk=gene_expression_matrix,
reference=ref, mRNA_cell = mRNA_cell, ..., .homonyms="last")

call = rlang::call2(EPIC::EPIC, !!!arguments)
epic_res_raw = eval(call)

t(epic_res_raw$cellFractions)

}




#' Deconvolute using ConsesnusTME and a custom signature matrix
#'
#' @param gene_expression_matrix a m x n matrix with m genes and n samples. Data
#' should be TPM normalized and log10 scaled.
#' @param signature_genes a list with each element containing genes to represent a cell type. The cell types
#' should be the names of each element of the list.
#' @param stat_method Choose statistical framework to generate the entichment scores.
#' Default: 'ssgsea'. Available methods: 'ssgsea', 'gsva', 'plage', 'zscore', 'singScore'.
#' These mirror the parameter options of \code{GSVA::gsva()} with the exception of \code{singScore}
#' which leverages \code{singscore::multiScore()}
#' @note ConsensusTME uses tumor-specific consensus built gene signatures. In this case
#' only the user-provided signature will be used
#' @export
#'
deconvolute_consensus_tme_custom = function(gene_expression_matrix, signature_genes, stat_method = 'ssgsea'){

results = ConsensusTME::geneSetEnrichment(gene_expression_matrix, signature_genes,
stat_method)

return(results)

}



#' Deconvolute using BASE and a custom signature matrix
#'
#' @param gene_expression_matrix a m x n matrix with m genes and n samples. Data
#' should be TPM normalized and log10 scaled.
#' @param signature_matrix a m x l matrix with m genes and l cell types. Data
#' should be non normalized, as the normalization wil be done in the construction
#' of the compendium (internal structure)
#' @param n_permutations the number of permutations of each sample expression
#' to generate. These are used to normalize the results.
#' @param log10 logical. if TRUE, log10 transforms the expression matrix.
#' @export
#'
deconvolute_base_custom = function(gene_expression_matrix,
signature_matrix,
n_permutations = 100,
log10 = TRUE){

new.cell.compendium <- create_base_compendium(signature_matrix)
results = base_algorithm(gene_expression_matrix, new.cell.compendium, perm = n_permutations)

return(t(results))
}
9 changes: 6 additions & 3 deletions R/immune_deconvolution_methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -276,13 +276,15 @@ deconvolute_abis = function(gene_expression_matrix,
#'
#' @param gene_expression_matrix a m x n matrix with m genes and n samples
#' @param indications a n-vector giving and indication string (e.g. 'brca') for each sample.
#' Different cancer types should be analyzed separately.
#' The method requires at least 2 samples of a certain cancer type.
#' Accepted indications are 'kich', 'blca', 'brca', 'cesc', 'gbm', 'hnsc', 'kirp', 'lgg',
#' 'lihc', 'luad', 'lusc', 'prad', 'sarc', 'pcpg', 'paad', 'tgct',
#' 'ucec', 'ov', 'skcm', 'dlbc', 'kirc', 'acc', 'meso', 'thca',
#' 'uvm', 'ucs', 'thym', 'esca', 'stad', 'read', 'coad', 'chol'
#' @param stat_method Choose statistical framework to generate the entichment scores.
#' Default: 'ssgsea'
#' Default: 'ssgsea'. Available methods: 'ssgsea', 'gsva', 'plage', 'zscore', 'singScore'.
#' These mirror the parameter options of \code{GSVA::gsva()} with the exception of \code{singScore}
#' which leverages \code{singscore::multiScore()}
#' @param ... passed through to the original ConsensusTME function. A native argument takes precedence
#' over an immunedeconv argument. Documentation can be found at http://consensusTME.org
#'
Expand All @@ -305,7 +307,8 @@ deconvolute_consensus_tme = function(gene_expression_matrix,
list.results <- list()
for(t in tumor.types){
cur.samples <- indications == t
cur.results <- ConsensusTME::consensusTMEAnalysis(as.matrix(gene_expression_matrix[, cur.samples]), t, method)
cur.results <- ConsensusTME::consensusTMEAnalysis(as.matrix(gene_expression_matrix[, cur.samples]),
t, method)

list.results[[t]] <- cur.results
}
Expand Down
2 changes: 2 additions & 0 deletions R/mouse_cell_type_mapping.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#' @import magrittr
#' @import stringr
#'
#' @name mouse_cell_type_mapping
#' @docType package
NULL

#' Since DCQ and BASE provide estimates for several cell types, this function
Expand Down
Loading

0 comments on commit 3c24859

Please sign in to comment.