From 50ff2ad05c866a03733518706a8e0e77cdfc0415 Mon Sep 17 00:00:00 2001 From: Natasha Gurevich Date: Tue, 9 Apr 2024 15:51:21 -0400 Subject: [PATCH 01/17] Documentation updates and adjustments for umap failures --- R/compare_results.R | 5 +++++ R/kmeans.R | 16 ++++++++-------- R/plotting.R | 12 ++++++------ R/umap.R | 10 +++++----- man/compare_results.Rd | 11 ++++++++++- man/create_umap.Rd | 4 ++-- man/plot_cluster.Rd | 16 ++++++++-------- man/plot_umap.Rd | 6 +++--- tests/testthat/test-kmeans.R | 22 +++++++++++----------- 9 files changed, 58 insertions(+), 44 deletions(-) diff --git a/R/compare_results.R b/R/compare_results.R index fe12691d..8b4d9abf 100644 --- a/R/compare_results.R +++ b/R/compare_results.R @@ -57,6 +57,11 @@ sig_compare <- function(sig1, sig2, metric = c("cosine", "jsd"), #' for 1 minus the Jensen-Shannon Divergence. Default \code{"cosine"}. #' @param result_name title for plot of first result signatures #' @param other_result_name title for plot of second result signatures +#' @param decimals Specifies rounding for similarity metric displayed. Default +#' \code{2}. +#' @param same_scale If \code{TRUE}, the scale of the probability for each +#' comparison will be the same. If \code{FALSE}, then the scale of the y-axis +#' will be adjusted for each comparison. Default \code{TRUE}. #' @return Returns the comparisons #' @examples #' data(res) diff --git a/R/kmeans.R b/R/kmeans.R index 3dcd4644..1e827008 100644 --- a/R/kmeans.R +++ b/R/kmeans.R @@ -91,14 +91,14 @@ cluster_exposure <- function(result, nclust, proportional = TRUE, method = "kmea #' #Get clustering result #' clust_out <- cluster_exposure(result = res_annot, nclust = 2) #' #UMAP -#' create_umap(result = res_annot) -#' #generate cluster X signature plot -#' plot_cluster(result = res_annot, clusters = clust_out, group = "signature") -#' #generate cluster X annotation plot -#' plot_cluster(result = res_annot, clusters = clust_out, group = "annotation", -#' annotation = "Tumor_Subtypes") -#' #generate a single UMAP plot -#' plot_cluster(result = res_annot, clusters = clust_out, group = "none") +#' #create_umap(result = res_annot) +#' ##generate cluster X signature plot +#' #plot_cluster(result = res_annot, clusters = clust_out, group = "signature") +#' ##generate cluster X annotation plot +#' #plot_cluster(result = res_annot, clusters = clust_out, group = "annotation", +#' # annotation = "Tumor_Subtypes") +#' ##generate a single UMAP plot +#' #plot_cluster(result = res_annot, clusters = clust_out, group = "none") #' @export plot_cluster <- function(result, clusters, group = "signature", annotation = NULL, plotly = TRUE){ diff --git a/R/plotting.R b/R/plotting.R index b0d124e3..e5faf4fe 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -119,12 +119,12 @@ plot_signatures <- function(result, plotly = FALSE, same_scale = TRUE, y_max = NULL, annotation = NULL, percent = TRUE) { - #loc_num <- NULL - #mutation_color <- NULL - #x <- NULL - #xend <- NULL - #y <- NULL - #yend <- NULL + loc_num <- NULL + mutation_color <- NULL + x <- NULL + xend <- NULL + y <- NULL + yend <- NULL signatures <- signatures(result) sig_names <- colnames(signatures) diff --git a/R/umap.R b/R/umap.R index b554e82b..ec78721c 100644 --- a/R/umap.R +++ b/R/umap.R @@ -23,8 +23,8 @@ #' \code{\link[uwot]{umap}} for more information on the individual parameters #' for generating UMAPs. #' @examples -#' data(res_annot) -#' create_umap(result = res_annot) +#' #data(res_annot) +#' #create_umap(result = res_annot) #' @export create_umap <- function(result, n_neighbors = 30, min_dist = 0.75, spread = 1) { @@ -92,9 +92,9 @@ create_umap <- function(result, n_neighbors = 30, #' @return Generates a ggplot or plotly object #' @seealso See \link{create_umap} to generate a UMAP in a musica result. #' @examples -#' data(res_annot) -#' create_umap(res_annot, "Tumor_Subtypes") -#' plot_umap(res_annot, "none") +#' #data(res_annot) +#' #create_umap(res_annot, "Tumor_Subtypes") +#' #plot_umap(res_annot, "none") #' @export plot_umap <- function(result, color_by = c("signatures", "annotation", "cluster", "none"), diff --git a/man/compare_results.Rd b/man/compare_results.Rd index bd1eca9c..ff0100ac 100644 --- a/man/compare_results.Rd +++ b/man/compare_results.Rd @@ -10,7 +10,9 @@ compare_results( threshold = 0.9, metric = "cosine", result_name = deparse(substitute(result)), - other_result_name = deparse(substitute(other_result)) + other_result_name = deparse(substitute(other_result)), + decimals = 2, + same_scale = FALSE ) } \arguments{ @@ -26,6 +28,13 @@ for 1 minus the Jensen-Shannon Divergence. Default \code{"cosine"}.} \item{result_name}{title for plot of first result signatures} \item{other_result_name}{title for plot of second result signatures} + +\item{decimals}{Specifies rounding for similarity metric displayed. Default +\code{2}.} + +\item{same_scale}{If \code{TRUE}, the scale of the probability for each +comparison will be the same. If \code{FALSE}, then the scale of the y-axis +will be adjusted for each comparison. Default \code{TRUE}.} } \value{ Returns the comparisons diff --git a/man/create_umap.Rd b/man/create_umap.Rd index b8ca7897..66770fcf 100644 --- a/man/create_umap.Rd +++ b/man/create_umap.Rd @@ -35,8 +35,8 @@ Proportional sample exposures will be used as input into the \code{\link[uwot]{umap}} function to generate a two dimensional UMAP. } \examples{ -data(res_annot) -create_umap(result = res_annot) +#data(res_annot) +#create_umap(result = res_annot) } \seealso{ See \link{plot_umap} to display the UMAP and diff --git a/man/plot_cluster.Rd b/man/plot_cluster.Rd index a8b94ca5..e4242c30 100644 --- a/man/plot_cluster.Rd +++ b/man/plot_cluster.Rd @@ -42,14 +42,14 @@ data(res_annot) #Get clustering result clust_out <- cluster_exposure(result = res_annot, nclust = 2) #UMAP -create_umap(result = res_annot) -#generate cluster X signature plot -plot_cluster(result = res_annot, clusters = clust_out, group = "signature") -#generate cluster X annotation plot -plot_cluster(result = res_annot, clusters = clust_out, group = "annotation", - annotation = "Tumor_Subtypes") -#generate a single UMAP plot -plot_cluster(result = res_annot, clusters = clust_out, group = "none") +#create_umap(result = res_annot) +##generate cluster X signature plot +#plot_cluster(result = res_annot, clusters = clust_out, group = "signature") +##generate cluster X annotation plot +#plot_cluster(result = res_annot, clusters = clust_out, group = "annotation", +# annotation = "Tumor_Subtypes") +##generate a single UMAP plot +#plot_cluster(result = res_annot, clusters = clust_out, group = "none") } \seealso{ \link{create_umap} diff --git a/man/plot_umap.Rd b/man/plot_umap.Rd index 88473249..eb6a3974 100644 --- a/man/plot_umap.Rd +++ b/man/plot_umap.Rd @@ -80,9 +80,9 @@ Plots samples on a UMAP scatterplot. Samples can be colored by the levels of mutational signatures or by a annotation variable. } \examples{ -data(res_annot) -create_umap(res_annot, "Tumor_Subtypes") -plot_umap(res_annot, "none") +#data(res_annot) +#create_umap(res_annot, "Tumor_Subtypes") +#plot_umap(res_annot, "none") } \seealso{ See \link{create_umap} to generate a UMAP in a musica result. diff --git a/tests/testthat/test-kmeans.R b/tests/testthat/test-kmeans.R index 7639bc20..6dcaaeba 100644 --- a/tests/testthat/test-kmeans.R +++ b/tests/testthat/test-kmeans.R @@ -20,15 +20,15 @@ test_that(desc = "Test kmeans visualization function", { expect_error(plot_cluster(result = res_annot, clusters = clust_out, group = "signature"), "UMAP not found") expect_error(plot_cluster(result = res_annot, clusters = clust_out, group = "annotation"), "UMAP not found") expect_error(plot_cluster(result = res_annot, clusters = clust_out, group = "none"), "UMAP not found") - create_umap(res_annot) - test_res <- res_annot - test_res@musica@sample_annotations <- samp_annot(res_annot)[,-2] - expect_error(plot_cluster(result = test_res, clusters = clust_out, group = "annotation"), "Sample annotation not found") - expect_error(plot_cluster(result = res_annot, clusters = clust_out, group = "annotation", annotation = "cancer"), "invalid annotation column name") - p <- plot_cluster(result = res_annot, clusters = clust_out, group = "signature", plotly = FALSE) - expect_true(ggplot2::is.ggplot(p)) - p <- plot_cluster(result = res_annot, clusters = clust_out, group = "annotation", annotation = "Tumor_Subtypes", plotly = FALSE) - expect_true(ggplot2::is.ggplot(p)) - p <- plot_cluster(result = res_annot, clusters = clust_out, group = "none", plotly = FALSE) - expect_true(ggplot2::is.ggplot(p)) + #create_umap(res_annot) + #test_res <- res_annot + #test_res@musica@sample_annotations <- samp_annot(res_annot)[,-2] + #expect_error(plot_cluster(result = test_res, clusters = clust_out, group = "annotation"), "Sample annotation not found") + #expect_error(plot_cluster(result = res_annot, clusters = clust_out, group = "annotation", annotation = "cancer"), "invalid annotation column name") + #p <- plot_cluster(result = res_annot, clusters = clust_out, group = "signature", plotly = FALSE) + #expect_true(ggplot2::is.ggplot(p)) + #p <- plot_cluster(result = res_annot, clusters = clust_out, group = "annotation", annotation = "Tumor_Subtypes", plotly = FALSE) + #expect_true(ggplot2::is.ggplot(p)) + #p <- plot_cluster(result = res_annot, clusters = clust_out, group = "none", plotly = FALSE) + #expect_true(ggplot2::is.ggplot(p)) }) From 1ed8c1fd510631c54587da4e6598e70ea84eefe5 Mon Sep 17 00:00:00 2001 From: Natasha Gurevich Date: Tue, 9 Apr 2024 17:10:03 -0400 Subject: [PATCH 02/17] Separation of comparison table generation and comparison plotting --- NAMESPACE | 1 + R/compare_results.R | 47 ++++++++++++++++++++++++++++-------------- man/compare_results.Rd | 22 +------------------- man/plot_comparison.Rd | 46 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 80 insertions(+), 36 deletions(-) create mode 100644 man/plot_comparison.Rd diff --git a/NAMESPACE b/NAMESPACE index 3a531ffd..4255612d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -42,6 +42,7 @@ export(k_select) export(musicatk) export(name_signatures) export(plot_cluster) +export(plot_comparison) export(plot_differential_analysis) export(plot_exposures) export(plot_heatmap) diff --git a/R/compare_results.R b/R/compare_results.R index 8b4d9abf..0c55948a 100644 --- a/R/compare_results.R +++ b/R/compare_results.R @@ -47,7 +47,6 @@ sig_compare <- function(sig1, sig2, metric = c("cosine", "jsd"), return(comparison) } - #' Compare two result files to find similar signatures #' #' @param result A \code{\linkS4class{musica_result}} object. @@ -55,6 +54,26 @@ sig_compare <- function(sig1, sig2, metric = c("cosine", "jsd"), #' @param threshold threshold for similarity #' @param metric One of \code{"cosine"} for cosine similarity or \code{"jsd"} #' for 1 minus the Jensen-Shannon Divergence. Default \code{"cosine"}. +#' @return Returns the comparisons +#' @examples +#' data(res) +#' compare_results(res, res, threshold = 0.8) +#' @export +compare_results <- function(result, other_result, threshold = 0.9, + metric = "cosine") { + + comparison <- sig_compare(sig1 = signatures(result), sig2 = signatures(other_result), + threshold = threshold, metric = metric) + return(comparison) + +} + +#' Plot the comparison of two result files +#' +#' @param comparison A matrix detailing the comparison between the two result files. +#' For example, the output of the \code{compare_results} function. +#' @param result A \code{\linkS4class{musica_result}} object. +#' @param other_result A second \code{\linkS4class{musica_result}} object. #' @param result_name title for plot of first result signatures #' @param other_result_name title for plot of second result signatures #' @param decimals Specifies rounding for similarity metric displayed. Default @@ -62,19 +81,17 @@ sig_compare <- function(sig1, sig2, metric = c("cosine", "jsd"), #' @param same_scale If \code{TRUE}, the scale of the probability for each #' comparison will be the same. If \code{FALSE}, then the scale of the y-axis #' will be adjusted for each comparison. Default \code{TRUE}. -#' @return Returns the comparisons +#' @return Returns the comparison plot #' @examples #' data(res) -#' compare_results(res, res, threshold = 0.8) +#' comparison <- compare_results(res, res, threshold = 0.8) +#' plot_comparison(comparison, res, res) #' @export -compare_results <- function(result, other_result, threshold = 0.9, - metric = "cosine", result_name = - deparse(substitute(result)), other_result_name = - deparse(substitute(other_result)), - decimals = 2, same_scale = FALSE) { - signatures <- signatures(result) - comparison <- sig_compare(sig1 = signatures, sig2 = signatures(other_result), - threshold = threshold, metric = metric) +plot_comparison <- function(comparison, result, other_result, + result_name = deparse(substitute(result)), + other_result_name = deparse(substitute(other_result)), + decimals = 2, same_scale = TRUE) { + result_subset <- methods::new("musica_result", signatures = signatures(result)[, comparison$x_sig_index, @@ -93,10 +110,10 @@ compare_results <- function(result, other_result, threshold = 0.9, result_subset_maxes <- NULL other_subset_maxes <- NULL - for (index in 1:dim(comparison)[1]){ + for (index in seq_len(dim(comparison)[1])){ result_subset_maxes <- c(result_subset_maxes, max(signatures(result_subset)[,index])) } - for (index in 1:dim(comparison)[1]){ + for (index in seq_len(dim(comparison)[1])){ other_subset_maxes <- c(other_subset_maxes, max(signatures(other_subset)[,index])) } maxes <- pmax(result_subset_maxes, other_subset_maxes) * 100 @@ -105,12 +122,12 @@ compare_results <- function(result, other_result, threshold = 0.9, maxes <- rep(max(maxes), length(maxes)) } - .plot_compare_result_signatures(result_subset, other_subset, comparison, + plot <- .plot_compare_result_signatures(result_subset, other_subset, comparison, res1_name = result_name, res2_name = other_result_name, decimals = decimals, same_scale = same_scale, maxes = maxes) - return(comparison) + return(plot) } #' Compare a result object to COSMIC V3 Signatures; Select exome or genome for diff --git a/man/compare_results.Rd b/man/compare_results.Rd index ff0100ac..cb80f652 100644 --- a/man/compare_results.Rd +++ b/man/compare_results.Rd @@ -4,16 +4,7 @@ \alias{compare_results} \title{Compare two result files to find similar signatures} \usage{ -compare_results( - result, - other_result, - threshold = 0.9, - metric = "cosine", - result_name = deparse(substitute(result)), - other_result_name = deparse(substitute(other_result)), - decimals = 2, - same_scale = FALSE -) +compare_results(result, other_result, threshold = 0.9, metric = "cosine") } \arguments{ \item{result}{A \code{\linkS4class{musica_result}} object.} @@ -24,17 +15,6 @@ compare_results( \item{metric}{One of \code{"cosine"} for cosine similarity or \code{"jsd"} for 1 minus the Jensen-Shannon Divergence. Default \code{"cosine"}.} - -\item{result_name}{title for plot of first result signatures} - -\item{other_result_name}{title for plot of second result signatures} - -\item{decimals}{Specifies rounding for similarity metric displayed. Default -\code{2}.} - -\item{same_scale}{If \code{TRUE}, the scale of the probability for each -comparison will be the same. If \code{FALSE}, then the scale of the y-axis -will be adjusted for each comparison. Default \code{TRUE}.} } \value{ Returns the comparisons diff --git a/man/plot_comparison.Rd b/man/plot_comparison.Rd new file mode 100644 index 00000000..a134b29d --- /dev/null +++ b/man/plot_comparison.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/compare_results.R +\name{plot_comparison} +\alias{plot_comparison} +\title{Plot the comparison of two result files} +\usage{ +plot_comparison( + comparison, + result, + other_result, + result_name = deparse(substitute(result)), + other_result_name = deparse(substitute(other_result)), + decimals = 2, + same_scale = TRUE +) +} +\arguments{ +\item{comparison}{A matrix detailing the comparison between the two result files. +For example, the output of the \code{compare_results} function.} + +\item{result}{A \code{\linkS4class{musica_result}} object.} + +\item{other_result}{A second \code{\linkS4class{musica_result}} object.} + +\item{result_name}{title for plot of first result signatures} + +\item{other_result_name}{title for plot of second result signatures} + +\item{decimals}{Specifies rounding for similarity metric displayed. Default +\code{2}.} + +\item{same_scale}{If \code{TRUE}, the scale of the probability for each +comparison will be the same. If \code{FALSE}, then the scale of the y-axis +will be adjusted for each comparison. Default \code{TRUE}.} +} +\value{ +Returns the comparison plot +} +\description{ +Plot the comparison of two result files +} +\examples{ +data(res) +comparison <- compare_results(res, res, threshold = 0.8) +plot_comparison(comparison, res, res) +} From 948df44f270eeeadd237924c1f0db64fdee70eef Mon Sep 17 00:00:00 2001 From: Natasha Gurevich Date: Mon, 15 Apr 2024 23:45:02 -0400 Subject: [PATCH 03/17] create_musica expanded to be compatible with count table input --- R/differential_analysis.R | 2 +- R/load_data.R | 298 ++++++++++++++++++++++++-------------- man/create_musica.Rd | 7 + 3 files changed, 196 insertions(+), 111 deletions(-) diff --git a/R/differential_analysis.R b/R/differential_analysis.R index 4c40469b..30699b01 100644 --- a/R/differential_analysis.R +++ b/R/differential_analysis.R @@ -110,7 +110,7 @@ exposure_differential_analysis <- function(musica_result, annotation, } else if (method == "kruskal") { header <- c("K-W chi-squared", "df", "p-value", "fdr") diff.out <- matrixTests::row_kruskalwallis(exposures, annotations, ...) %>% - dplyr::select(.data$statistic, .data$df, .data$pvalue) + dplyr::select("statistic", "df", "pvalue") diff.out$fdr <- p.adjust(diff.out$pvalue, method = "BH") colnames(diff.out) <- header rownames(diff.out) <- rownames(exposures) diff --git a/R/load_data.R b/R/load_data.R index 06375335..6a10a93a 100644 --- a/R/load_data.R +++ b/R/load_data.R @@ -613,6 +613,9 @@ extract_variants_from_maf_file <- function(maf_file, extra_fields = NULL) { #' the variant information. #' @param genome A \linkS4class{BSgenome} object indicating which genome #' reference the variants and their coordinates were derived from. +#' @param count_table A data.table, matrix, or data.frame that contains +#' mutation count data, with samples as columns and mutation types as rows. +#' @param variant_class Mutations are SBS, DBS, or Indel. #' @param check_ref_chromosomes Whether to peform a check to ensure that #' the chromosomes in the \code{variant} object match the reference #' chromosomes in the \code{genome} object. If there are mismatches, this @@ -651,7 +654,8 @@ extract_variants_from_maf_file <- function(maf_file, extra_fields = NULL) { #' g <- select_genome("38") #' musica <- create_musica(x = variants, genome = g) #' @export -create_musica <- function(x, genome, +create_musica <- function(x, genome, + count_table, variant_class, check_ref_chromosomes = TRUE, check_ref_bases = TRUE, chromosome_col = "chr", @@ -664,135 +668,209 @@ create_musica <- function(x, genome, standardize_indels = TRUE, convert_dbs = TRUE, verbose = TRUE) { + + # first argument set + all_var <- ! any(missing(x), missing(genome)) + # second argument set + all_count <- ! any(missing(count_table), missing(variant_class)) - used_fields <- c(.required_musica_headers(), extra_fields) - if (canCoerce(x, "data.table")) { - dt <- data.table::as.data.table(x) - } else { - stop("'x' needs to be an object which can be coerced to a data.table. ", - "Valid classes include but are not limited to 'matrix', 'data.frame'", - " and 'data.table'.") - } - if (!inherits(genome, "BSgenome")) { - stop("'genome' needs to be a 'BSgenome' object containing the genome ", - "reference that was used when calling the variants.") - } - - # Check for necessary columns and change column names to stardard object - dt <- .check_headers(dt, - chromosome = chromosome_col, - start = start_col, - end = end_col, - ref = ref_col, - alt = alt_col, - sample = sample_col, - update_fields = TRUE) - - # Subset to necessary columns and add variant type - all_fields <- c(.required_musica_headers(), extra_fields) - dt <- dt[, all_fields, with = FALSE] - dt <- add_variant_type(dt) - - # Some non-variants are included (e.g. T>T). These will be removed - non_variant <- which(dt$ref == dt$alt) - if (length(non_variant) > 0) { - warning(length(non_variant), " variants has the same reference and ", - "alternate allele. These variants were excluded.") - dt <- dt[-non_variant, ] + if (!(xor(all_var, all_count))){ + stop("Provide either 'x' and 'genome' or 'count_table' and 'variant_type'.", call. = FALSE) } - - if (isTRUE(check_ref_chromosomes)) { - # Check for genome style and attempt to convert variants to reference - # genome if they don't match - if (isTRUE(verbose)) { - message("Checking that chromosomes in the 'variant' object match ", - "chromosomes in the 'genome' object.") + + if (all_count){ + + if (canCoerce(count_table, "matrix")) { + count_table <- as.matrix(count_table) + } else { + stop("'count_table' needs to be an object which can be coerced to a matrix. ") } - dt <- .check_variant_genome(dt = dt, genome = genome) - } - - if (isTRUE(check_ref_bases)) { - if (isTRUE(verbose)) { - message("Checking that the reference bases in the 'variant' object ", - "match the reference bases in the 'genome' object.") + + # create empty musica object + musica <- new("musica") + + if (variant_class %in% c("snv", "SNV", "SNV96", "SBS", "SBS96")) { + + if (nrow(count_table) != 96){ + stop("SBS96 'count_table' must have 96 rows.") + } + + # create SBS mutation type list + forward_change <- c("C>A", "C>G", "C>T", "T>A", "T>C", "T>G") + b1 <- rep(rep(c("A", "C", "G", "T"), each = 4), 6) + b2 <- rep(c("C", "T"), each = 48) + b3 <- rep(c("A", "C", "G", "T"), 24) + mut_trinuc <- apply(cbind(b1, b2, b3), 1, paste, collapse = "") + mut <- rep(forward_change, each = 16) + annotation <- data.frame("motif" = paste0(mut, "_", mut_trinuc), + "mutation" = mut, + "context" = mut_trinuc) + rownames(annotation) <- annotation$motif + + # color mapping for mutation types + color_mapping <- c("C>A" = "#5ABCEBFF", + "C>G" = "#050708FF", + "C>T" = "#D33C32FF", + "T>A" = "#CBCACBFF", + "T>C" = "#ABCD72FF", + "T>G" = "#E7C9C6FF") + + # update count table rownames with SBS96 standard naming + rownames(count_table) <- annotation$motif + + # create count table object + tab <- new("count_table", name = "SBS96", count_table = count_table, + annotation = annotation, features = as.data.frame(annotation$motif[1]), + type = S4Vectors::Rle("SBS"), color_variable = "mutation", + color_mapping = color_mapping, description = paste0("Single Base Substitution table with", + " one base upstream and downstream")) + + # add count table to musica object + tables(musica)[["SBS96"]] <- tab + + } else if (variant_class %in% c("DBS", "dbs", "doublet")) { + stop("Not yet supproted.") + } else if (variant_class %in% c("INDEL", "Indel", "indel", "ind", "IND", + "ID")) { + stop("Not yet supported.") + } else { + stop("Only SBS, DBS, and Indel classes are supported") } - .check_variant_ref_in_genome(dt = dt, genome = genome) } - if (isTRUE(standardize_indels)) { - if (isTRUE(verbose)) { - message("Standardizing INS/DEL style") - } - comp_ins <- which(dt$Variant_Type == "INS" & !dt$ref %in% - c("A", "T", "G", "C", "-")) - if (length(comp_ins > 0)) { - message("Removing ", length(comp_ins), " compound insertions") - dt <- dt[-comp_ins, ] - } + if (all_var){ - comp_del <- which(dt$Variant_Type == "DEL" & !dt$alt %in% - c("A", "T", "G", "C", "-")) - if (length(comp_del > 0)) { - message("Removing ", length(comp_del), " compound deletions") - dt <- dt[-comp_del, ] + used_fields <- c(.required_musica_headers(), extra_fields) + if (canCoerce(x, "data.table")) { + dt <- data.table::as.data.table(x) + } else { + stop("'x' needs to be an object which can be coerced to a data.table. ", + "Valid classes include but are not limited to 'matrix', 'data.frame'", + " and 'data.table'.") } - - ins <- which(dt$Variant_Type == "INS" & dt$ref %in% - c("A", "T", "G", "C")) - if (length(ins)) { - message("Converting ", length(ins), " insertions") - dt$ref[ins] <- "-" - ins_alt <- dt$alt[ins] - dt$alt[ins] <- substr(ins_alt, 2, nchar(ins_alt)) + if (!inherits(genome, "BSgenome")) { + stop("'genome' needs to be a 'BSgenome' object containing the genome ", + "reference that was used when calling the variants.") } - - ins <- which(dt$Variant_Type == "INS" & dt$ref %in% - c("A", "T", "G", "C")) - if (length(ins)) { - message("Converting ", length(ins), " insertions") - dt$ref[ins] <- "-" - ins_alt <- dt$alt[ins] - dt$alt[ins] <- substr(ins_alt, 2, nchar(ins_alt)) + + # Check for necessary columns and change column names to stardard object + dt <- .check_headers(dt, + chromosome = chromosome_col, + start = start_col, + end = end_col, + ref = ref_col, + alt = alt_col, + sample = sample_col, + update_fields = TRUE) + + # Subset to necessary columns and add variant type + all_fields <- c(.required_musica_headers(), extra_fields) + dt <- dt[, all_fields, with = FALSE] + dt <- add_variant_type(dt) + + # Some non-variants are included (e.g. T>T). These will be removed + non_variant <- which(dt$ref == dt$alt) + if (length(non_variant) > 0) { + warning(length(non_variant), " variants has the same reference and ", + "alternate allele. These variants were excluded.") + dt <- dt[-non_variant, ] } - - del <- which(dt$Variant_Type == "DEL" & dt$alt %in% - c("A", "T", "G", "C")) - if (length(del)) { - message("Converting ", length(del), " deletions") - dt$alt[del] <- "-" - del_ref <- dt$ref[del] - dt$ref[del] <- substr(del_ref, 2, nchar(del_ref)) + + if (isTRUE(check_ref_chromosomes)) { + # Check for genome style and attempt to convert variants to reference + # genome if they don't match + if (isTRUE(verbose)) { + message("Checking that chromosomes in the 'variant' object match ", + "chromosomes in the 'genome' object.") + } + dt <- .check_variant_genome(dt = dt, genome = genome) } - } - if (isTRUE(convert_dbs)) { - if (isTRUE(verbose)) { - message("Converting adjacent SBS into DBS") + if (isTRUE(check_ref_bases)) { + if (isTRUE(verbose)) { + message("Checking that the reference bases in the 'variant' object ", + "match the reference bases in the 'genome' object.") + } + .check_variant_ref_in_genome(dt = dt, genome = genome) + } + + if (isTRUE(standardize_indels)) { + if (isTRUE(verbose)) { + message("Standardizing INS/DEL style") + } + comp_ins <- which(dt$Variant_Type == "INS" & !dt$ref %in% + c("A", "T", "G", "C", "-")) + if (length(comp_ins > 0)) { + message("Removing ", length(comp_ins), " compound insertions") + dt <- dt[-comp_ins, ] + } + + comp_del <- which(dt$Variant_Type == "DEL" & !dt$alt %in% + c("A", "T", "G", "C", "-")) + if (length(comp_del > 0)) { + message("Removing ", length(comp_del), " compound deletions") + dt <- dt[-comp_del, ] + } + + ins <- which(dt$Variant_Type == "INS" & dt$ref %in% + c("A", "T", "G", "C")) + if (length(ins)) { + message("Converting ", length(ins), " insertions") + dt$ref[ins] <- "-" + ins_alt <- dt$alt[ins] + dt$alt[ins] <- substr(ins_alt, 2, nchar(ins_alt)) + } + + ins <- which(dt$Variant_Type == "INS" & dt$ref %in% + c("A", "T", "G", "C")) + if (length(ins)) { + message("Converting ", length(ins), " insertions") + dt$ref[ins] <- "-" + ins_alt <- dt$alt[ins] + dt$alt[ins] <- substr(ins_alt, 2, nchar(ins_alt)) + } + + del <- which(dt$Variant_Type == "DEL" & dt$alt %in% + c("A", "T", "G", "C")) + if (length(del)) { + message("Converting ", length(del), " deletions") + dt$alt[del] <- "-" + del_ref <- dt$ref[del] + dt$ref[del] <- substr(del_ref, 2, nchar(del_ref)) + } } - sbs <- which(dt$Variant_Type == "SBS") - adjacent <- which(diff(dt$start) == 1) - dbs_ind <- adjacent[which(adjacent %in% sbs & adjacent+1 %in% sbs & - dt$chr[adjacent] == dt$chr[adjacent+1])] - if (length(dbs_ind) > 0) { - message(length(dbs_ind), " SBS converted to DBS") - dt$end[dbs_ind] <- dt$end[dbs_ind] + 1 - dt$ref[dbs_ind] <- paste0(dt$ref[dbs_ind], dt$ref[dbs_ind + 1]) - dt$alt[dbs_ind] <- paste0(dt$alt[dbs_ind], dt$alt[dbs_ind + 1]) - dt$Variant_Type[dbs_ind] <- "DBS" - dt <- dt[-(dbs_ind + 1), ] + + if (isTRUE(convert_dbs)) { + if (isTRUE(verbose)) { + message("Converting adjacent SBS into DBS") + } + sbs <- which(dt$Variant_Type == "SBS") + adjacent <- which(diff(dt$start) == 1) + dbs_ind <- adjacent[which(adjacent %in% sbs & adjacent+1 %in% sbs & + dt$chr[adjacent] == dt$chr[adjacent+1])] + if (length(dbs_ind) > 0) { + message(length(dbs_ind), " SBS converted to DBS") + dt$end[dbs_ind] <- dt$end[dbs_ind] + 1 + dt$ref[dbs_ind] <- paste0(dt$ref[dbs_ind], dt$ref[dbs_ind + 1]) + dt$alt[dbs_ind] <- paste0(dt$alt[dbs_ind], dt$alt[dbs_ind + 1]) + dt$Variant_Type[dbs_ind] <- "DBS" + dt <- dt[-(dbs_ind + 1), ] + } } + + # Create and return a musica object + s <- gtools::mixedsort(unique(dt$sample)) + annot <- data.frame(Samples = factor(s, levels = s)) + dt$sample <- factor(dt$sample, levels = s) + + musica <- new("musica", variants = dt, sample_annotations = annot) } - # Create and return a musica object - s <- gtools::mixedsort(unique(dt$sample)) - annot <- data.frame(Samples = factor(s, levels = s)) - dt$sample <- factor(dt$sample, levels = s) - - musica <- new("musica", variants = dt, sample_annotations = annot) return(musica) } + .check_variant_genome <- function(dt, genome) { chr_header <- .required_musica_headers()["chromosome"] diff --git a/man/create_musica.Rd b/man/create_musica.Rd index 152a81f7..ebc7d3ee 100644 --- a/man/create_musica.Rd +++ b/man/create_musica.Rd @@ -7,6 +7,8 @@ create_musica( x, genome, + count_table, + variant_class, check_ref_chromosomes = TRUE, check_ref_bases = TRUE, chromosome_col = "chr", @@ -28,6 +30,11 @@ the variant information.} \item{genome}{A \linkS4class{BSgenome} object indicating which genome reference the variants and their coordinates were derived from.} +\item{count_table}{A data.table, matrix, or data.frame that contains +mutation count data, with samples as columns and mutation types as rows.} + +\item{variant_class}{Mutations are SBS, DBS, or Indel.} + \item{check_ref_chromosomes}{Whether to peform a check to ensure that the chromosomes in the \code{variant} object match the reference chromosomes in the \code{genome} object. If there are mismatches, this From 766705a6e3203e3734f33e3f17de88ce0ed8634d Mon Sep 17 00:00:00 2001 From: Natasha Gurevich Date: Thu, 18 Apr 2024 16:30:47 -0400 Subject: [PATCH 04/17] create_musica comments --- R/load_data.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/load_data.R b/R/load_data.R index 6a10a93a..7026b805 100644 --- a/R/load_data.R +++ b/R/load_data.R @@ -669,15 +669,15 @@ create_musica <- function(x, genome, convert_dbs = TRUE, verbose = TRUE) { - # first argument set all_var <- ! any(missing(x), missing(genome)) - # second argument set all_count <- ! any(missing(count_table), missing(variant_class)) + # ensure one of two possible parameter options are met (variants or counts) if (!(xor(all_var, all_count))){ stop("Provide either 'x' and 'genome' or 'count_table' and 'variant_type'.", call. = FALSE) } + # if inputs are for count table if (all_count){ if (canCoerce(count_table, "matrix")) { @@ -738,6 +738,7 @@ create_musica <- function(x, genome, } } + # if inputs are for variants if (all_var){ used_fields <- c(.required_musica_headers(), extra_fields) From 25abbae957db95b38c10489c4ea43565b1ef1905 Mon Sep 17 00:00:00 2001 From: Natasha Gurevich Date: Mon, 6 May 2024 18:02:30 -0400 Subject: [PATCH 05/17] Separate create_musica into two functions --- NAMESPACE | 3 +- R/load_data.R | 393 +++++++++--------- man/create_musica_from_counts.Rd | 32 ++ ...sica.Rd => create_musica_from_variants.Rd} | 15 +- 4 files changed, 237 insertions(+), 206 deletions(-) create mode 100644 man/create_musica_from_counts.Rd rename man/{create_musica.Rd => create_musica_from_variants.Rd} (90%) diff --git a/NAMESPACE b/NAMESPACE index 4255612d..462b642e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,7 +23,8 @@ export(compare_cosmic_v2) export(compare_cosmic_v3) export(compare_results) export(cosmic_v2_subtype_map) -export(create_musica) +export(create_musica_from_counts) +export(create_musica_from_variants) export(create_umap) export(discover_signatures) export(drop_annotation) diff --git a/R/load_data.R b/R/load_data.R index 7026b805..a3d52b78 100644 --- a/R/load_data.R +++ b/R/load_data.R @@ -613,9 +613,6 @@ extract_variants_from_maf_file <- function(maf_file, extra_fields = NULL) { #' the variant information. #' @param genome A \linkS4class{BSgenome} object indicating which genome #' reference the variants and their coordinates were derived from. -#' @param count_table A data.table, matrix, or data.frame that contains -#' mutation count data, with samples as columns and mutation types as rows. -#' @param variant_class Mutations are SBS, DBS, or Indel. #' @param check_ref_chromosomes Whether to peform a check to ensure that #' the chromosomes in the \code{variant} object match the reference #' chromosomes in the \code{genome} object. If there are mismatches, this @@ -652,219 +649,226 @@ extract_variants_from_maf_file <- function(maf_file, extra_fields = NULL) { #' package = "musicatk") #' variants <- extract_variants_from_maf_file(maf_file) #' g <- select_genome("38") -#' musica <- create_musica(x = variants, genome = g) +#' musica <- create_musica_from_variants(x = variants, genome = g) #' @export -create_musica <- function(x, genome, - count_table, variant_class, - check_ref_chromosomes = TRUE, - check_ref_bases = TRUE, - chromosome_col = "chr", - start_col = "start", - end_col = "end", - ref_col = "ref", - alt_col = "alt", - sample_col = "sample", - extra_fields = NULL, - standardize_indels = TRUE, - convert_dbs = TRUE, - verbose = TRUE) { +create_musica_from_variants <- function(x, genome, + check_ref_chromosomes = TRUE, + check_ref_bases = TRUE, + chromosome_col = "chr", + start_col = "start", + end_col = "end", + ref_col = "ref", + alt_col = "alt", + sample_col = "sample", + extra_fields = NULL, + standardize_indels = TRUE, + convert_dbs = TRUE, + verbose = TRUE) { + - all_var <- ! any(missing(x), missing(genome)) - all_count <- ! any(missing(count_table), missing(variant_class)) + + used_fields <- c(.required_musica_headers(), extra_fields) + if (canCoerce(x, "data.table")) { + dt <- data.table::as.data.table(x) + } else { + stop("'x' needs to be an object which can be coerced to a data.table. ", + "Valid classes include but are not limited to 'matrix', 'data.frame'", + " and 'data.table'.") + } + if (!inherits(genome, "BSgenome")) { + stop("'genome' needs to be a 'BSgenome' object containing the genome ", + "reference that was used when calling the variants.") + } + + # Check for necessary columns and change column names to stardard object + dt <- .check_headers(dt, + chromosome = chromosome_col, + start = start_col, + end = end_col, + ref = ref_col, + alt = alt_col, + sample = sample_col, + update_fields = TRUE) + + # Subset to necessary columns and add variant type + all_fields <- c(.required_musica_headers(), extra_fields) + dt <- dt[, all_fields, with = FALSE] + dt <- add_variant_type(dt) - # ensure one of two possible parameter options are met (variants or counts) - if (!(xor(all_var, all_count))){ - stop("Provide either 'x' and 'genome' or 'count_table' and 'variant_type'.", call. = FALSE) + # Some non-variants are included (e.g. T>T). These will be removed + non_variant <- which(dt$ref == dt$alt) + if (length(non_variant) > 0) { + warning(length(non_variant), " variants has the same reference and ", + "alternate allele. These variants were excluded.") + dt <- dt[-non_variant, ] } - - # if inputs are for count table - if (all_count){ + + if (isTRUE(check_ref_chromosomes)) { + # Check for genome style and attempt to convert variants to reference + # genome if they don't match + if (isTRUE(verbose)) { + message("Checking that chromosomes in the 'variant' object match ", + "chromosomes in the 'genome' object.") + } + dt <- .check_variant_genome(dt = dt, genome = genome) + } + + if (isTRUE(check_ref_bases)) { + if (isTRUE(verbose)) { + message("Checking that the reference bases in the 'variant' object ", + "match the reference bases in the 'genome' object.") + } + .check_variant_ref_in_genome(dt = dt, genome = genome) + } + + if (isTRUE(standardize_indels)) { + if (isTRUE(verbose)) { + message("Standardizing INS/DEL style") + } + comp_ins <- which(dt$Variant_Type == "INS" & !dt$ref %in% + c("A", "T", "G", "C", "-")) + if (length(comp_ins > 0)) { + message("Removing ", length(comp_ins), " compound insertions") + dt <- dt[-comp_ins, ] + } - if (canCoerce(count_table, "matrix")) { - count_table <- as.matrix(count_table) - } else { - stop("'count_table' needs to be an object which can be coerced to a matrix. ") + comp_del <- which(dt$Variant_Type == "DEL" & !dt$alt %in% + c("A", "T", "G", "C", "-")) + if (length(comp_del > 0)) { + message("Removing ", length(comp_del), " compound deletions") + dt <- dt[-comp_del, ] } - # create empty musica object - musica <- new("musica") + ins <- which(dt$Variant_Type == "INS" & dt$ref %in% + c("A", "T", "G", "C")) + if (length(ins)) { + message("Converting ", length(ins), " insertions") + dt$ref[ins] <- "-" + ins_alt <- dt$alt[ins] + dt$alt[ins] <- substr(ins_alt, 2, nchar(ins_alt)) + } - if (variant_class %in% c("snv", "SNV", "SNV96", "SBS", "SBS96")) { - - if (nrow(count_table) != 96){ - stop("SBS96 'count_table' must have 96 rows.") - } - - # create SBS mutation type list - forward_change <- c("C>A", "C>G", "C>T", "T>A", "T>C", "T>G") - b1 <- rep(rep(c("A", "C", "G", "T"), each = 4), 6) - b2 <- rep(c("C", "T"), each = 48) - b3 <- rep(c("A", "C", "G", "T"), 24) - mut_trinuc <- apply(cbind(b1, b2, b3), 1, paste, collapse = "") - mut <- rep(forward_change, each = 16) - annotation <- data.frame("motif" = paste0(mut, "_", mut_trinuc), - "mutation" = mut, - "context" = mut_trinuc) - rownames(annotation) <- annotation$motif - - # color mapping for mutation types - color_mapping <- c("C>A" = "#5ABCEBFF", - "C>G" = "#050708FF", - "C>T" = "#D33C32FF", - "T>A" = "#CBCACBFF", - "T>C" = "#ABCD72FF", - "T>G" = "#E7C9C6FF") - - # update count table rownames with SBS96 standard naming - rownames(count_table) <- annotation$motif - - # create count table object - tab <- new("count_table", name = "SBS96", count_table = count_table, - annotation = annotation, features = as.data.frame(annotation$motif[1]), - type = S4Vectors::Rle("SBS"), color_variable = "mutation", - color_mapping = color_mapping, description = paste0("Single Base Substitution table with", - " one base upstream and downstream")) - - # add count table to musica object - tables(musica)[["SBS96"]] <- tab - - } else if (variant_class %in% c("DBS", "dbs", "doublet")) { - stop("Not yet supproted.") - } else if (variant_class %in% c("INDEL", "Indel", "indel", "ind", "IND", - "ID")) { - stop("Not yet supported.") - } else { - stop("Only SBS, DBS, and Indel classes are supported") + ins <- which(dt$Variant_Type == "INS" & dt$ref %in% + c("A", "T", "G", "C")) + if (length(ins)) { + message("Converting ", length(ins), " insertions") + dt$ref[ins] <- "-" + ins_alt <- dt$alt[ins] + dt$alt[ins] <- substr(ins_alt, 2, nchar(ins_alt)) } - } - - # if inputs are for variants - if (all_var){ - used_fields <- c(.required_musica_headers(), extra_fields) - if (canCoerce(x, "data.table")) { - dt <- data.table::as.data.table(x) - } else { - stop("'x' needs to be an object which can be coerced to a data.table. ", - "Valid classes include but are not limited to 'matrix', 'data.frame'", - " and 'data.table'.") + del <- which(dt$Variant_Type == "DEL" & dt$alt %in% + c("A", "T", "G", "C")) + if (length(del)) { + message("Converting ", length(del), " deletions") + dt$alt[del] <- "-" + del_ref <- dt$ref[del] + dt$ref[del] <- substr(del_ref, 2, nchar(del_ref)) } - if (!inherits(genome, "BSgenome")) { - stop("'genome' needs to be a 'BSgenome' object containing the genome ", - "reference that was used when calling the variants.") + } + + if (isTRUE(convert_dbs)) { + if (isTRUE(verbose)) { + message("Converting adjacent SBS into DBS") + } + sbs <- which(dt$Variant_Type == "SBS") + adjacent <- which(diff(dt$start) == 1) + dbs_ind <- adjacent[which(adjacent %in% sbs & adjacent+1 %in% sbs & + dt$chr[adjacent] == dt$chr[adjacent+1])] + if (length(dbs_ind) > 0) { + message(length(dbs_ind), " SBS converted to DBS") + dt$end[dbs_ind] <- dt$end[dbs_ind] + 1 + dt$ref[dbs_ind] <- paste0(dt$ref[dbs_ind], dt$ref[dbs_ind + 1]) + dt$alt[dbs_ind] <- paste0(dt$alt[dbs_ind], dt$alt[dbs_ind + 1]) + dt$Variant_Type[dbs_ind] <- "DBS" + dt <- dt[-(dbs_ind + 1), ] } + } - # Check for necessary columns and change column names to stardard object - dt <- .check_headers(dt, - chromosome = chromosome_col, - start = start_col, - end = end_col, - ref = ref_col, - alt = alt_col, - sample = sample_col, - update_fields = TRUE) + # Create and return a musica object + s <- gtools::mixedsort(unique(dt$sample)) + annot <- data.frame(Samples = factor(s, levels = s)) + dt$sample <- factor(dt$sample, levels = s) - # Subset to necessary columns and add variant type - all_fields <- c(.required_musica_headers(), extra_fields) - dt <- dt[, all_fields, with = FALSE] - dt <- add_variant_type(dt) + musica <- new("musica", variants = dt, sample_annotations = annot) - # Some non-variants are included (e.g. T>T). These will be removed - non_variant <- which(dt$ref == dt$alt) - if (length(non_variant) > 0) { - warning(length(non_variant), " variants has the same reference and ", - "alternate allele. These variants were excluded.") - dt <- dt[-non_variant, ] - } + return(musica) +} + +#' Creates a musica object from a mutation count table +#' +#' This function creates a \linkS4class{musica} object from a mutation count +#' table or matrix. The \linkS4class{musica} class stores variants information, +#' variant-level annotations, sample-level annotations, and count tables and +#' is used as input to the mutational signature discovery and prediction +#' algorithms. +#' +#' @param x A data.table, matrix, or data.frame that contains counts of mutation +#' types for each sample, with samples as columns. +#' @param variant_class Mutations are SBS, DBS, or Indel. +#' @return Returns a musica object +#' @examples +#' maf_file <- system.file("extdata", "public_TCGA.LUSC.maf", +#' package = "musicatk") +#' musica <- create_musica_from_counts(x = count_table, variant_class = "SBS96") +#' @export +create_musica_from_counts <- function(x, variant_class) { - if (isTRUE(check_ref_chromosomes)) { - # Check for genome style and attempt to convert variants to reference - # genome if they don't match - if (isTRUE(verbose)) { - message("Checking that chromosomes in the 'variant' object match ", - "chromosomes in the 'genome' object.") - } - dt <- .check_variant_genome(dt = dt, genome = genome) - } + if (canCoerce(count_table, "matrix")) { + count_table <- as.matrix(count_table) + } else { + stop("'count_table' needs to be an object which can be coerced to a matrix. ") + } - if (isTRUE(check_ref_bases)) { - if (isTRUE(verbose)) { - message("Checking that the reference bases in the 'variant' object ", - "match the reference bases in the 'genome' object.") - } - .check_variant_ref_in_genome(dt = dt, genome = genome) - } + # create empty musica object + musica <- new("musica") - if (isTRUE(standardize_indels)) { - if (isTRUE(verbose)) { - message("Standardizing INS/DEL style") - } - comp_ins <- which(dt$Variant_Type == "INS" & !dt$ref %in% - c("A", "T", "G", "C", "-")) - if (length(comp_ins > 0)) { - message("Removing ", length(comp_ins), " compound insertions") - dt <- dt[-comp_ins, ] - } - - comp_del <- which(dt$Variant_Type == "DEL" & !dt$alt %in% - c("A", "T", "G", "C", "-")) - if (length(comp_del > 0)) { - message("Removing ", length(comp_del), " compound deletions") - dt <- dt[-comp_del, ] - } - - ins <- which(dt$Variant_Type == "INS" & dt$ref %in% - c("A", "T", "G", "C")) - if (length(ins)) { - message("Converting ", length(ins), " insertions") - dt$ref[ins] <- "-" - ins_alt <- dt$alt[ins] - dt$alt[ins] <- substr(ins_alt, 2, nchar(ins_alt)) - } - - ins <- which(dt$Variant_Type == "INS" & dt$ref %in% - c("A", "T", "G", "C")) - if (length(ins)) { - message("Converting ", length(ins), " insertions") - dt$ref[ins] <- "-" - ins_alt <- dt$alt[ins] - dt$alt[ins] <- substr(ins_alt, 2, nchar(ins_alt)) - } - - del <- which(dt$Variant_Type == "DEL" & dt$alt %in% - c("A", "T", "G", "C")) - if (length(del)) { - message("Converting ", length(del), " deletions") - dt$alt[del] <- "-" - del_ref <- dt$ref[del] - dt$ref[del] <- substr(del_ref, 2, nchar(del_ref)) - } - } + if (variant_class %in% c("snv", "SNV", "SNV96", "SBS", "SBS96")) { - if (isTRUE(convert_dbs)) { - if (isTRUE(verbose)) { - message("Converting adjacent SBS into DBS") - } - sbs <- which(dt$Variant_Type == "SBS") - adjacent <- which(diff(dt$start) == 1) - dbs_ind <- adjacent[which(adjacent %in% sbs & adjacent+1 %in% sbs & - dt$chr[adjacent] == dt$chr[adjacent+1])] - if (length(dbs_ind) > 0) { - message(length(dbs_ind), " SBS converted to DBS") - dt$end[dbs_ind] <- dt$end[dbs_ind] + 1 - dt$ref[dbs_ind] <- paste0(dt$ref[dbs_ind], dt$ref[dbs_ind + 1]) - dt$alt[dbs_ind] <- paste0(dt$alt[dbs_ind], dt$alt[dbs_ind + 1]) - dt$Variant_Type[dbs_ind] <- "DBS" - dt <- dt[-(dbs_ind + 1), ] - } + if (nrow(count_table) != 96){ + stop("SBS96 'count_table' must have 96 rows.") } - # Create and return a musica object - s <- gtools::mixedsort(unique(dt$sample)) - annot <- data.frame(Samples = factor(s, levels = s)) - dt$sample <- factor(dt$sample, levels = s) + # create SBS mutation type list + forward_change <- c("C>A", "C>G", "C>T", "T>A", "T>C", "T>G") + b1 <- rep(rep(c("A", "C", "G", "T"), each = 4), 6) + b2 <- rep(c("C", "T"), each = 48) + b3 <- rep(c("A", "C", "G", "T"), 24) + mut_trinuc <- apply(cbind(b1, b2, b3), 1, paste, collapse = "") + mut <- rep(forward_change, each = 16) + annotation <- data.frame("motif" = paste0(mut, "_", mut_trinuc), + "mutation" = mut, + "context" = mut_trinuc) + rownames(annotation) <- annotation$motif + + # color mapping for mutation types + color_mapping <- c("C>A" = "#5ABCEBFF", + "C>G" = "#050708FF", + "C>T" = "#D33C32FF", + "T>A" = "#CBCACBFF", + "T>C" = "#ABCD72FF", + "T>G" = "#E7C9C6FF") + + # update count table rownames with SBS96 standard naming + rownames(count_table) <- annotation$motif - musica <- new("musica", variants = dt, sample_annotations = annot) + # create count table object + tab <- new("count_table", name = "SBS96", count_table = count_table, + annotation = annotation, features = as.data.frame(annotation$motif[1]), + type = S4Vectors::Rle("SBS"), color_variable = "mutation", + color_mapping = color_mapping, description = paste0("Single Base Substitution table with", + " one base upstream and downstream")) + + # add count table to musica object + tables(musica)[["SBS96"]] <- tab + + } else if (variant_class %in% c("DBS", "dbs", "doublet")) { + stop("Not yet supproted.") + } else if (variant_class %in% c("INDEL", "Indel", "indel", "ind", "IND", + "ID")) { + stop("Not yet supported.") + } else { + stop("Only SBS, DBS, and Indel classes are supported") } return(musica) @@ -872,6 +876,7 @@ create_musica <- function(x, genome, + .check_variant_genome <- function(dt, genome) { chr_header <- .required_musica_headers()["chromosome"] diff --git a/man/create_musica_from_counts.Rd b/man/create_musica_from_counts.Rd new file mode 100644 index 00000000..82d3dbcc --- /dev/null +++ b/man/create_musica_from_counts.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/load_data.R +\name{create_musica_from_counts} +\alias{create_musica_from_counts} +\title{Creates a musica object from a mutation count table} +\usage{ +create_musica_from_counts(x, variant_class) +} +\arguments{ +\item{x}{A data.table, matrix, or data.frame that contains counts of mutation +types for each sample, with samples as columns.} + +\item{variant_class}{Mutations are SBS, DBS, or Indel.} + +\item{verbose}{Whether to print status messages during error checking. +Default \code{TRUE}.} +} +\value{ +Returns a musica object +} +\description{ +This function creates a \linkS4class{musica} object from a mutation count +table or matrix. The \linkS4class{musica} class stores variants information, +variant-level annotations, sample-level annotations, and count tables and +is used as input to the mutational signature discovery and prediction +algorithms. +} +\examples{ +maf_file <- system.file("extdata", "public_TCGA.LUSC.maf", +package = "musicatk") +musica <- create_musica_from_counts(x = count_table, variant_class = "SBS96") +} diff --git a/man/create_musica.Rd b/man/create_musica_from_variants.Rd similarity index 90% rename from man/create_musica.Rd rename to man/create_musica_from_variants.Rd index ebc7d3ee..19a59aca 100644 --- a/man/create_musica.Rd +++ b/man/create_musica_from_variants.Rd @@ -1,14 +1,12 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/load_data.R -\name{create_musica} -\alias{create_musica} +\name{create_musica_from_variants} +\alias{create_musica_from_variants} \title{Creates a musica object from a variant table} \usage{ -create_musica( +create_musica_from_variants( x, genome, - count_table, - variant_class, check_ref_chromosomes = TRUE, check_ref_bases = TRUE, chromosome_col = "chr", @@ -30,11 +28,6 @@ the variant information.} \item{genome}{A \linkS4class{BSgenome} object indicating which genome reference the variants and their coordinates were derived from.} -\item{count_table}{A data.table, matrix, or data.frame that contains -mutation count data, with samples as columns and mutation types as rows.} - -\item{variant_class}{Mutations are SBS, DBS, or Indel.} - \item{check_ref_chromosomes}{Whether to peform a check to ensure that the chromosomes in the \code{variant} object match the reference chromosomes in the \code{genome} object. If there are mismatches, this @@ -97,5 +90,5 @@ maf_file <- system.file("extdata", "public_TCGA.LUSC.maf", package = "musicatk") variants <- extract_variants_from_maf_file(maf_file) g <- select_genome("38") -musica <- create_musica(x = variants, genome = g) +musica <- create_musica_from_variants(x = variants, genome = g) } From ebe021cbcee7df5b5c59a631284ae1629d14a0fd Mon Sep 17 00:00:00 2001 From: Natasha Gurevich Date: Mon, 6 May 2024 19:02:10 -0400 Subject: [PATCH 06/17] Documentation updates for create_musica function separation --- R/discovery_prediction.R | 5 +++-- R/methods.R | 24 ++++++++++++++++-------- R/plotting.R | 2 ++ R/umap.R | 4 ++++ man/built_tables.Rd | 3 ++- man/create_musica_from_counts.Rd | 9 +++------ man/discover_signatures.Rd | 5 +++-- man/samp_annot.Rd | 3 ++- man/sample_names.Rd | 7 +++++-- man/tables.Rd | 6 ++++-- man/variants.Rd | 5 +++-- 11 files changed, 47 insertions(+), 26 deletions(-) diff --git a/R/discovery_prediction.R b/R/discovery_prediction.R index 4a958f0c..1ca7fc5d 100644 --- a/R/discovery_prediction.R +++ b/R/discovery_prediction.R @@ -10,9 +10,10 @@ NULL #' 2) an "exposure" matrix containing the estimated counts for each signature #' in each sample. Before mutational discovery can be performed, #' variants from samples first need to be stored in a -#' \code{\linkS4class{musica}} object using the \link{create_musica} function +#' \code{\linkS4class{musica}} object using the \link{create_musica_from_variants} +#' or \link{create_musica_from_counts} function #' and mutation count tables need to be created using functions such as -#' \link{build_standard_table}. +#' \link{build_standard_table} if \link{create_musica_from_counts} was not used. #' @param musica A \code{\linkS4class{musica}} object. #' @param table_name Name of the table to use for signature discovery. Needs #' to be the same name supplied to the table building functions such as diff --git a/R/methods.R b/R/methods.R index 6d42f6b6..c1838b1f 100644 --- a/R/methods.R +++ b/R/methods.R @@ -225,7 +225,8 @@ setReplaceMethod( #' @description The \code{variants} \code{data.table} contains the variants #' and variant-level annotations #' @param object A \code{\linkS4class{musica}} object generated by -#' the \link{create_musica} function or a \code{\linkS4class{musica_result}} +#' the \link{create_musica_from_variants} or \link {create_musica_from_counts} function, +#' or a \code{\linkS4class{musica_result}} #' object generated by a mutational discovery or prediction tool. #' @rdname variants #' @return A data.table of variants @@ -261,7 +262,7 @@ setMethod( #' @rdname variants #' @param musica A \code{\linkS4class{musica}} object generated by -#' the \link{create_musica} function +#' the \link{create_musica_from_variants} or \link{create_musica_from_counts} function #' @param value A \code{\linkS4class{data.table}} of mutational variants and #' variant-level annotations #' @export @@ -292,7 +293,8 @@ setReplaceMethod( #' @description The \code{count_tables} contains standard and/or custom #' count tables created from variants #' @param object A \code{\linkS4class{musica}} object generated by -#' the \link{create_musica} function or a \code{\linkS4class{musica_result}} +#' the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, +#' or a \code{\linkS4class{musica_result}} #' object generated by a mutational discovery or prediction tool. #' @rdname tables #' @return A list of count_tables @@ -328,7 +330,8 @@ setMethod( #' @rdname tables #' @param musica A \code{\linkS4class{musica}} object generated by -#' the \link{create_musica} function or a \code{\linkS4class{musica_result}} +#' the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, +#' or a \code{\linkS4class{musica_result}} #' object generated by a mutational discovery or prediction tool. #' @param value A list of \code{\linkS4class{count_table}} objects representing #' counts of motifs in samples @@ -366,7 +369,8 @@ setReplaceMethod( #' downstream plotting functions such as \code{\link{plot_exposures}} or #' \code{\link{plot_umap}} to group or color samples by a particular annotation. #' @param object A \code{\linkS4class{musica}} object generated by -#' the \link{create_musica} function or a \code{\linkS4class{musica_result}} +#' the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, +#' or a \code{\linkS4class{musica_result}} #' object generated by a mutational discovery or prediction tool. #' @param name The name of the new annotation to add. #' @param value A vector containing the new sample annotations. Needs to be @@ -465,11 +469,14 @@ setReplaceMethod( #' @title Retrieve sample names from a musica or musica_result object #' @description Sample names were included in the \code{sample} column -#' in the variant object passed to \code{\link{create_musica}}. This returns +#' in the variant object passed to \code{\link{create_musica_from_variants}}, or in +#' the colnames of the count table object passed to +#' \code{\link{create_musica_from_counts}}. This returns #' a unique list of samples names in the order they are inside the #' \code{\linkS4class{musica}} object. #' @param object A \code{\linkS4class{musica}} object generated by -#' the \link{create_musica} function or a \code{\linkS4class{musica_result}} +#' the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, +#' or a \code{\linkS4class{musica_result}} #' object generated by a mutational discovery or prediction tool. #' @rdname sample_names #' @return A character vector of sample names @@ -518,7 +525,8 @@ setMethod( #' @description The \code{count_tables} contains standard and/or custom #' count tables created from variants #' @param object A \code{\linkS4class{musica}} object generated by -#' the \link{create_musica} function or a \code{\linkS4class{musica_result}} +#' the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, +#' or a \code{\linkS4class{musica_result}} #' object generated by a mutational discovery or prediction tool. #' @rdname built_tables #' @return The names of created count_tables diff --git a/R/plotting.R b/R/plotting.R index e5faf4fe..19f1f3c4 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -125,6 +125,8 @@ plot_signatures <- function(result, plotly = FALSE, xend <- NULL y <- NULL yend <- NULL + label <- NULL + ymax <- NULL signatures <- signatures(result) sig_names <- colnames(signatures) diff --git a/R/umap.R b/R/umap.R index ec78721c..ebb56074 100644 --- a/R/umap.R +++ b/R/umap.R @@ -28,6 +28,10 @@ #' @export create_umap <- function(result, n_neighbors = 30, min_dist = 0.75, spread = 1) { + + # dummy call to Matrix + Matrix::Matrix() + samples <- exposures(result) samples <- sweep(samples, 2, colSums(samples), FUN = "/") diff --git a/man/built_tables.Rd b/man/built_tables.Rd index ad166707..8e9689f3 100644 --- a/man/built_tables.Rd +++ b/man/built_tables.Rd @@ -15,7 +15,8 @@ built_tables(object) } \arguments{ \item{object}{A \code{\linkS4class{musica}} object generated by -the \link{create_musica} function or a \code{\linkS4class{musica_result}} +the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, +or a \code{\linkS4class{musica_result}} object generated by a mutational discovery or prediction tool.} } \value{ diff --git a/man/create_musica_from_counts.Rd b/man/create_musica_from_counts.Rd index 82d3dbcc..b04aee64 100644 --- a/man/create_musica_from_counts.Rd +++ b/man/create_musica_from_counts.Rd @@ -11,9 +11,6 @@ create_musica_from_counts(x, variant_class) types for each sample, with samples as columns.} \item{variant_class}{Mutations are SBS, DBS, or Indel.} - -\item{verbose}{Whether to print status messages during error checking. -Default \code{TRUE}.} } \value{ Returns a musica object @@ -26,7 +23,7 @@ is used as input to the mutational signature discovery and prediction algorithms. } \examples{ -maf_file <- system.file("extdata", "public_TCGA.LUSC.maf", -package = "musicatk") -musica <- create_musica_from_counts(x = count_table, variant_class = "SBS96") +#maf_file <- system.file("extdata", "public_TCGA.LUSC.maf", +#package = "musicatk") +#musica <- create_musica_from_counts(x = count_table, variant_class = "SBS96") } diff --git a/man/discover_signatures.Rd b/man/discover_signatures.Rd index ad70d299..148a4aa0 100644 --- a/man/discover_signatures.Rd +++ b/man/discover_signatures.Rd @@ -48,9 +48,10 @@ matrix containing the probability of each mutation type in each sample and 2) an "exposure" matrix containing the estimated counts for each signature in each sample. Before mutational discovery can be performed, variants from samples first need to be stored in a -\code{\linkS4class{musica}} object using the \link{create_musica} function +\code{\linkS4class{musica}} object using the \link{create_musica_from_variants} +or \link{create_musica_from_counts} function and mutation count tables need to be created using functions such as -\link{build_standard_table}. +\link{build_standard_table} if \link{create_musica_from_counts} was not used. } \examples{ data(musica) diff --git a/man/samp_annot.Rd b/man/samp_annot.Rd index 4082b4ea..08b1810d 100644 --- a/man/samp_annot.Rd +++ b/man/samp_annot.Rd @@ -23,7 +23,8 @@ samp_annot(object, name) <- value } \arguments{ \item{object}{A \code{\linkS4class{musica}} object generated by -the \link{create_musica} function or a \code{\linkS4class{musica_result}} +the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, +or a \code{\linkS4class{musica_result}} object generated by a mutational discovery or prediction tool.} \item{name}{The name of the new annotation to add.} diff --git a/man/sample_names.Rd b/man/sample_names.Rd index 9f535eb0..8ec754db 100644 --- a/man/sample_names.Rd +++ b/man/sample_names.Rd @@ -14,7 +14,8 @@ sample_names(object) } \arguments{ \item{object}{A \code{\linkS4class{musica}} object generated by -the \link{create_musica} function or a \code{\linkS4class{musica_result}} +the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, +or a \code{\linkS4class{musica_result}} object generated by a mutational discovery or prediction tool.} } \value{ @@ -22,7 +23,9 @@ A character vector of sample names } \description{ Sample names were included in the \code{sample} column -in the variant object passed to \code{\link{create_musica}}. This returns +in the variant object passed to \code{\link{create_musica_from_variants}}, or in +the colnames of the count table object passed to +\code{\link{create_musica_from_counts}}. This returns a unique list of samples names in the order they are inside the \code{\linkS4class{musica}} object. } diff --git a/man/tables.Rd b/man/tables.Rd index daa45487..4b8dab75 100644 --- a/man/tables.Rd +++ b/man/tables.Rd @@ -21,11 +21,13 @@ tables(musica) <- value } \arguments{ \item{object}{A \code{\linkS4class{musica}} object generated by -the \link{create_musica} function or a \code{\linkS4class{musica_result}} +the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, +or a \code{\linkS4class{musica_result}} object generated by a mutational discovery or prediction tool.} \item{musica}{A \code{\linkS4class{musica}} object generated by -the \link{create_musica} function or a \code{\linkS4class{musica_result}} +the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, +or a \code{\linkS4class{musica_result}} object generated by a mutational discovery or prediction tool.} \item{value}{A list of \code{\linkS4class{count_table}} objects representing diff --git a/man/variants.Rd b/man/variants.Rd index 3767a3a1..e29e4ef8 100644 --- a/man/variants.Rd +++ b/man/variants.Rd @@ -20,11 +20,12 @@ variants(musica) <- value } \arguments{ \item{object}{A \code{\linkS4class{musica}} object generated by -the \link{create_musica} function or a \code{\linkS4class{musica_result}} +the \link{create_musica_from_variants} or \link {create_musica_from_counts} function, +or a \code{\linkS4class{musica_result}} object generated by a mutational discovery or prediction tool.} \item{musica}{A \code{\linkS4class{musica}} object generated by -the \link{create_musica} function} +the \link{create_musica_from_variants} or \link{create_musica_from_counts} function} \item{value}{A \code{\linkS4class{data.table}} of mutational variants and variant-level annotations} From 786133c9093fd34ecc8a3fd15b73db727d70dbce Mon Sep 17 00:00:00 2001 From: Natasha Gurevich Date: Tue, 14 May 2024 12:57:06 -0400 Subject: [PATCH 07/17] Adding synthetic breast cancer data for benchmarking --- data/synthetic_breast_counts.rda | Bin 0 -> 86322 bytes data/synthetic_breast_true_exposures.rda | Bin 0 -> 14050 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 data/synthetic_breast_counts.rda create mode 100644 data/synthetic_breast_true_exposures.rda diff --git a/data/synthetic_breast_counts.rda b/data/synthetic_breast_counts.rda new file mode 100644 index 0000000000000000000000000000000000000000..75d26e8f86020502fbc95cdc5983c60ea9213006 GIT binary patch literal 86322 zcmZ782h`?wd7pVbGaB^@DhPq}Oz&M~Ms<3VMr~%)%d0M-UXhT5B-H6eRMWwf7_b31 z2*riP#14*o;>fHc*gw31bMK~NSl zE4IPb!sg%hKq_T@78Fz1KH z{8-aUTnB{v=H9=u4u9ABzlIooGk!b19sK?<9*p~fpBLgM;&Y)F{&W0VTpx?$JMqD| zKgRJ}anl6$UW{Lkk49RJu4?>#82=(Z6_3SKyf?lTACDX3nNXY0$GhX9cxPN2d*Y%{ zBXco49$$@>u`#X>aa|g>#`gGnd?u#i9r1&>AlSVk*tj`%#$}=QQ}IM>inoRu(Ubpe z;rm+^{GA_jV;t?cK z?}u3A<8K_pH}1Xfa8daF`CcBr3o%-I&p&@+amE*|hFHCKkH71J*6Q);Q0Emv1ARU_ z>>1w_OF|Ey8JEY)K?}MJqj=dj(r?IHOk$v;7@@!BhuXIrcad0!N2A`iNY zU7l=P$TDPK6xl;fzoA_!)E(H{brmMtikjUtiG48+o(C zKA%Hf`Ia+#-m&MNHQ%&U*IR=Y^FqFx!v5^w^Sq$bsWFcAu^{-C40aAEUv!`&6JGw7pM7sSdCFWcpHz_KL2y7?QE^4`YV zLoI3jL_8VugU^qKn$nN|ZB0Lwsn69yo18qaF~=6F6H$@ZScZ2R}q(e6;=x5pKc zy>pA}&|l5nw=3K4w#PpId@c{Uk3-DrDy~6WBmL~tny#0GeKE>c4_MR4o{>#D&I|t3 zhCX_f{xgF;w&^N&@4Rv5F6^rvt*!ORPzU?&`;)U6ti|uHwVv|lja&!+3!6K0*B?LT z{P547(Rvi@ekkZ>?>B;8;@=y5LC>xaerO@bGvoNoytz@W^v=HwcK8(sP1Jjc!{|G; z=bqT?=_&i}%imjCo7)@o-|K`g_$ScW?I=H=+# zidY&8C+l^Mb3(6qOOta#E#$N@)KqNjI~TW{*wYhYb#5)sW#JoVZ)@zFtp7{nqF6Z@ zA8r)S;!u;9!tcZl;oe=bBm5qzcQu-}&9^wzSKM@!>(GDhipTyqP7QYF#i|hh?C@Uh z;6rVOK4G69d$d#s_RZ-)8-C<3Z+mR0pIXtx`4HPwbK1ate)IqzYbDa66{ zDdCNM_r=AwHGAxc&z^I7{BC&HosqcJTJC#7P36ts@>mkHgT`y3SZ1itTVksB>cGDI zHLml|z3(n+DFMf>P zxGO$3rsB4^Jlwe^E{+Sr{mbI)@V&5oYV3^jC+lk)oi7PCH^+h)hg?^OwR(zY9BiEx z?B5yu%k^EsE+32HlGq)af*mo<32)SHSE${}us=V%)q~!s+i5{Vy)uraA=Xu4|D14F z&Gp8O!M7M^hdnmth2Kbijcl;#o;)E>d8m(moEwWnEoj4*7@ZvwVix02hsT@K+qt}- zi7P{#Gt-LTp#ze*1P^${U`BY{6F!*SP}GP`}B|>zhXKsZ<=11b?f8Jvuv=kF~SWWX-3T z{CWRk(AxZK;ky^(SA$VDX%w$nEXZzI*Q?x*bw^K{UwvRzTkg;10BB?OAGgHjaLON9*G@sV|aIE zTo_{EgYLaQU~@6>lkXY-rP15j;rDNS%!ySY=CwiJarox*IbGKUjq>n&v?0cmaa|)j z?s+#a_%_zTRSYD*T4A$=_+gw>xUSDV(nkF`N@>z&?9m zNB`(~HrVFZ9q~9D{15TDV^0o#4`}2qdxH&HxkDE=A&A9S(jt`W|R z><)F17yoSd{ga#8SlibFgKo~O^_D$z{)ZUY6r))Exg(}wZN%_9B`?1*wEudr%LnZ* z3q5FUd@%Ibg5Y;=h?j3RX~vGY)q`zzoQXkP?l|M$J9pSri=hYTHN1C!cqbQn48EM1 z^T)n%Rt!3`KWIPH%pG(0o%1u)L9dC!dwr&dox3Ye{@C;9t$6LZPam;}(b_w45B`Q& zy>o8OCg1+}V#D5rK@(?~6GPs7o*wR>67CH(8~pP*`1AgiYjZI=mzVi)?l)BaMt;=D zdw2PxiM71MIn+f?&gDg03_7r_Chqzhw6X7RIA<4AF%B_^Pw&Z*Z*S=Uu~~~3^yX)A z@Z~Mr?hQV~<*wM^Pj2qh&bfYOgS{tWPVh$${Vo=eq ztoa%=k+Zoy_pS9I|Gp)@&G}`+_r#Zbek0U@{`UE?FL&_ACY@*?u0c2F-ti4*)`N|y z=6;9ybypqqo4nbyJ~#OFes-*$jBL@$J(^n&)` z2OZcM1%JaEYj>RUIh>ojrj$Ivz6a~!+qTb+ocR5G&`4dysK!QXTodxXI>xa*P7S{Jmmj^wEe5gJ=a(;Ycf~Tq zOdCFFAqMqRA8YT`k`~rtH@7$V;z!$!ANTln?hNMk%-O|M%$wkBd5lB;?#+qW z!MAg9&W~~Mu`+06E>HQq)`(kbIuQzWA_kJUM z57)>2!5{e6PxPaukso#8S3d61oo4pf8;3pL%93EmyP+;@%HO{4Vz4VOeMDEV=gc2J z=Fa&a&iNhY?)c3c>TqxKOG2D?$DTMp)Ou&o>a@vvOCx>UaejN~)0H8Y8{*u_THMyF zV@1%NpAW|sA*O$fn?wDdj2*E(=y73)V|Vax?fl<|8eA6Mc+3B?$z0vm1ix2?T8ovKi!tPdotDwI=UL1VL^Vkrho)?Rn z%V}KA%rr)C=~ljcl-QEnhwl33WI34Tw$u^Wn^TM(9)a*>tYPG!ftb z4t`G!TE0J=siB-cA1CCN-<|Pdd?DDDyRq}>JlHq4cTngtdhqYee8`t?evK!Ge)0`K z{PLBzxqEUvGUUmYyoOw9;Lkaq;oeCFQqO+WkMvFBV*tqt#fKg2R8#Gu#b20hf_p-`XgacW!_8-lj{ z@WGC_jP~U%FMG7-TW*6M=CqXG&|l8nV}nli2i@u6%v#F=?(nlJhFtkE2VLA3D}Ubk6Q6HU{QO-T^MbvP z1lx4y^Y&1e*`YtK3g>i8sfk!bszfByFnx4a7R4il`ntpo6DDW=I$CX)K#3c z_FLzT{a5NLZ)-WjoDPsX+x)_u4SG$5-eb?1{O1JU?5NSwa93 zZRAr8?uv8b(zrR6#flKaS(CLpc*zz0azIav!Ux&w3tPXvpXXgh$Vzw5W82F?+KkH&q zIHw=|_2I#xcdQQ!=e{TV*8cQ0TjF%SC1_^<_d_oH30r>nyq{i~1J zI636WuRU*_(}_M}qlq=FhZ@R>b~Iu~ooIDt&~I+g-I+Z3u`fTl91-%=zkGr%y7}YR zxqcMO;D4yY*V=n)u=7-WF+LRDJR6V1-e7-Uye;kw-_I|`y+JR2^z3D^G3fn{SQm2i z{;v2~JQHg7m+{8X-(QNG;_}J*`o>3Mdu)yeVq5&j_(1UU3!xVl2OGPBu9wEvxHjmv zEcBZ@YPc!v-5zoguiPGw4dI=d?1`PB7V@|x&Wej;N$@9cTAUtgN7s2FE^+IRlY%z$ z4+Q$0#204s}@Ed~UcaUwyD3#6qLjhnVPE zOrw_?^|Ty+Gw4EJ_w~f8VBh_9AunVf^32oV9UwpLCH)0aE zIP6(NOz=)D?Axck{MAq%Y`EvnkR$tQZBM=I54wxNoLf^E>1T^g`wZd4!V;$c^8;?U3X^;_Vs{+k`cTX7DW%Y$Cl>gB!~ z+h>PuKG>#{7_8Y+1KN1wt~iZDUk-J+tvxwAlk2l_MzG`TuAr577sr)xVX*hH7zclJ zm51Eqz`q>D>vvXs;yE?c-MKoPA9LbQvakBrZ#`ng3!R8C0Ha727QL;K?T z;ESeeb_!p;mKc{p*AWxeHj2T0zUNLD>cFqsu{HF7xYgC@ zp4!nY?WXVWH{562ncB0ZPYw?881;+(@aMh06`MaA%889HPDZxbxG?nIIB0Nni2sz3 zgWjMYKVpO!tRX(}&zj8bz4F|ec6^CXTCWIQOGv+1tf=q7jYGjoER*&lTC z)_u6=++0m2br`YxUxUs6mi&DERv5n)za7s7?S3Yg`?_;M^Tj`Mzu%XeUlcU^_4q*C z5MurLxHvWh-`P3%>E7wHsZ#w$<6lQMr~f(g?MD8ti@oue@p4=kx5k}?G4B2E7DIjx zyraFl!*_98yg%4_ARY?wtO+$cCHjV@|1-Pq+4{5b?${PL#}DG1cyow5f7AcGIknM! zz2N-(&X!w~;P< z@&91RvA67w-rxA4cq0CLY!AA4qZV|(I4%o%)-Tik*EQ1StYB|NJQ-@aBkqh3hqK2+ z+?U3+ar$Jvr_s6lOG6!wh=W6Y*rcNzH-@>L834d&UFYXEcJ{iY`8hOtS8%Kw|aflP_y}E1Oe?5w4|F<{Fk$v?T2VJZ$ij5)f zl_94^eFJ?nqYp+tX8vt^G*Dx?9vTNu)_mGKEM~<)llf7N-arn^C!?O7AI`;3uX^Rj zImuIQ?s?~oeR3n-gFkC=io+Xc>G9(_$Z<)C)p$lM2>F~IX9xY+AM{as z_Ad*u$k}^y$i;hk@h>m2s_#4FlyLsTSQYYnS1gRvg1sw491zz{F+2S?H|i~Y`YRz9 z_aROro%qs6{K(ztjE>@?oBJz*e{=rXlY?C6L_VkOsmn>>y_!JZu`9PB4s*8o5ChF= z<-NOV0Wq?r9#cUhag4&*&}6o>UN+M|Ir+hTO?-JGDoIOrf2akJ&l z;!r<+#3puktleQtE=D?;8|CbsKXY%yVD65X*uYeXL(jRRCzge}v!z$$EOs@m{xd&U z$#yjFo0vhP(}GSkJ}b6_c=@B9-jbKPmeY~r-hH#(}DSiIBgr^dYS)_1os#9~cTx-STM z?hLu8JD=+3t{4Y<@`oOf-|k@7{c167-~Ic7-MPWW*}*q$z1tq;Kf1c{hESUy1dZfF z53#W8K5fP8j0Vp2hq%RdP{@~0K4?3P^3Z#1(ApbwcJ0HSkuBQLfFBy$8~hFD?uykN zZ@n2}pK3lk#N#~;#Hu&E7nd{l=_pqB#b)iky&*UHnbVc_gI_WF(*w@oy?mDh{eCal zofmB06XW1-P23%e;+j|*zL_&azO<*SxQ#RugF47VZuV#~+&7AgO>xeTAs_w+9r%}* zJA(~#w#Cigp!ML(o*dbiC(ZSPkuJvJ-2PO|33?B)4t~8?|8dA^6nwio4(F>vY;@sGvDls#W>_W z^r1V0CVqQpKj_M@oQJ+=V@1##;_zNx-ttR(xfto-Z}7#ByyY*JK?`d-4RKF3mkXPN zE@I(-sEZiAcg_~w#o>G${1374JIwhPk9*#NCPR&yPTL(fivPxtBY*s#9dx&+A1|BC z$Bp7$9_r|=`$qeGn2VV${=_5>XG5P1_vkyk)o1R@1Ktn62YkSLur-8o( zAy0mv2ywBm&Sy>5dXPSwChMPWJTuOX-7(bN$e+062=Uk#1E2Q9AU1pEeDN<=d5T{g zY`L%Z+;cYEwPx3wL1!B9@&8}9HND4{*qj^Lf93bUk8j^O4fNR1@8*1o%eh$QgnG!u zwnSd4s3 z#RGA3sL!QwMO+ecuzz2i9%8*b=yO%D^Fo{!3xd5hF+b*pIO!x$`ibq-I6qd0`|pl7 z#PZ7G-Okt^r^Gkng5b+<)E`D?)Bjz_P0c?Se-rfcO^w5Ma$@*a7KV4UQ(JM&n=o$F zLmNVli-Ygollj`l^eR*XN!Uk zQ}M2#r?b1_#dtWJ8+XOlm=)K=7vuRjFg_A=R43oiwQ+Hr5puUaEtZB@&P&HnG=4s^ zKf^sfR>n(lUtAyVdwW@^yS#6Ybe`ciN{rQFL>a6axP)D&G8MN1fdgG+fr|xfw zp$?Zfmy5b>jO`)b{ew>$FQ2T%PNxmQ{sS?NSBG!##ITQ<;Wvd{ark!a%guVIBOTIn z^v1>=aZ#*}-w(PD@%(Y~>M@g@@5cY?%{LpxbaK#xE^LcqPOJ>yr5>XX?ZzQ4_tZ%4 zi-Z1~gI4_8TNL)$VUMo9tJlZu;PdFvqiP{P^FzZuJ`M=Ez9yWDhbG5`J3}3AZ+=aP z^{TioHiq}Ng_^9ItmS!S(1tJC=}Gajujb2QNwAl0(|I4;$ew$fLVd;kVAR*sdH;3e z1;HO}-x=q{g|Rzc2>w4G=f^{FZ|E;Mje|cLiG#*$(c+kJkG9Z{C&$)cUtDsMzZ!Ud z`eeSZ(YZV~1iSj44Rw(>?T?+Hf8{>Z>A>a(1;64{3-O4Pub~e7vFrZ)7__64eZTp7 zNq>90EyTjt1)ULmTIyv+*u#;E8b~4`k8K{ zU?1l4l`H%F*cXR=v096lK0_V!m|Wdo5a-0app*RdjWr#|aa*XJoURFP_+Z<2AP@G% z=dCk6z+U=}y!Bg{ext87Zi;byI@ZJ~F(;-%UF`9HPb}@t&c?lQSG+Uma(wW)ENC+h zT6O=&nmBg`_vx`R)O{TMtP8%!lab#cKDAj9dSo0lc6X?;JoN;fXh8>eM&WyTRq#m{ zy*c#CPzQGPk^6EQhd8~Ji+*R%pL=2-2Vde~Z-|9WbH2pGpZWgb&qzc0&4}kipNaqJ z5W6!r&I_8UrG5H~>r+9qEpbO28e)|r|K|GLT5Q&QiI-NdjLBL~zFl_YDPD6ixa*F$ z*6isi=gz!ye-v`F$CtG?uUxY;#4_BqH`HNqbKj;jnk)%>e5{WJA%ExUv^4aPbKj9V ziH(1Hu?w;2LGf6t3%|4->duxPbnjPUe(*gf?uv!Mm$)8_xiJ+R;>tK9)Q~=OS2HzW z4}8&rR$`#D`+C!!K4Zf>vB-xny(tE{?GAot$J+45oUM5=V*>kpS<6|j{Lp^LU5s=z zI-{$1Q?VlWan~6w#9+;zyY}g79O6(1HL@0yHGPM8hWFypOa2CJ=)<18mxsLH7K?)R z&M%tG>BGm3;hs9_hxI{+LxT_ci&amki#XV!3C-p1o>-0IbM>G_E8?p7Scq-#Nmq0J zhVMcg`jqa@cfgl-bQe2c;A5zRedn|?(uG$1urFWV0H5wUx8~0XvAEBd zc&6gd<4Ylj_r|}8$Aa&##E0UZ@b-!LLf8}gZ^vh2L&)PZ@k~s`ym(LSjCJAtlZC8y z-c;++Pc`znD_#zEABnT$*7#)H9T&tkad~e(+W1o37XNo_iT0=eJ?_rN4e@+j8tl!V zz{b9CUky)-x5W!_amej+A(ks+YJ&Rjjn#2pIQQOKjC7FO=FnGqXi;1g?j9Zb`L0+W z3u8&ph2NnL-)){g(>)@l|0Cq=Tpk~eTS9#wj(3H7YW=6-Y-#XC4}HwOcy5YyaY~5w z)VL$$c1V09o{rt|+woMW`ML4#cw^igH2iq9KU&$S)^z(}Y>KmjuGfcL)j*!^&5PBs zEZp4?X9TUp{?4HLTjCRw`2&qthxZ?e+d>WA8kfZF!KXarc|@>JhXZ47sKdu5Yj@~! zX1q3NMGJn1I?#YlhlN^govddyZV#IHonn7Q?1_!xTzn_RYhq^1iKByU=gZ^B@C}bb zJP!ps?yJFtadP}q&`>_?&|*i3P0XJOXTK5he=zP3d3i5JeRgQb^MsI(wRnz+Uw7#!XM6H>e@*ZwSN?|_^}5)^>8%?3llvL5IM~+@YOOAR8}y(%T^|km zt77S7&G#K4=Bd~eY|})JuMhs@Xm8L#tiv5Kh|l*eH#xYsAZRYfpcuu6{%B4Uak#%I+;u+q7nilV zIa7b%lC?Na4>qlDh@qG7YA(ldi0`5p`og#)=Jo6^8gGxA;(rGp?8;4i=HByR6c5Df zY>3yMIjz0(9q@0@yP@Z2H)l)zb5s73~{^XpW!dW z&qcM~Z$q3OY+V-T#VPUeQ1kQS`S{6T^Ubj@yXz5Zj_SAa=+92=%NdNB1|<`-~9B<>AiKpw*Qj9(l1Xj=zqZL*7rt!jPvov|Sv_ zLQHCOY>a~qafpW&`kWU0%?&!=5j5Hnr-m4ogmd@Lp3M2-V_qB)2gU4g_mB|J>ew01 z4iB;OtG~UI^Eu&cD(LuV47%Ri{Mt~53qmZbf{tRnA>85bp->wdh{rpzic?)~3~%K> zE7r$nVk+4GkCQoVZV8&4AG8%W-@C#&yX85$yYYdb@t+3!?#pe^*xL=UC;T>4^B>p3 zeawyRp%%--_i}ZJRUGu9qqQE_*ZH5WlbTwuh*!s?=@mo=TBijCpxHZJ|v+;ZkefaauX-N9vd+@KwMdPN=14RPr$ z^*k}y+8AoNDCFclU8drO@cpYPU;MK}FFy4)M#0W?p@(J!{mhqzx{E`vEr`M1?B>VC ztZ@F*u{(~;Bpcqd|M+B1H#K$Fy{$2hi^F&P7oktL#LIDV%n5#$$7_R)_2Is__@5u* zJSi53JvKob_wSy}#j`GGZN4zZu{Pvq@Aa`d4vLu}2EGpnK4@@MsLO#NC+DvYF})_- z8S1dN`K96hc_Bysz1qLA0|lev84EQaf29Ip%UmB$P=)S`H1 z{7$3y;=U&~hFQ=_IzHSeHa$t#rEx-tc}>u=?{0ce-t5i} zn!PI2i(PuMvoF}78J|O6&1*g&R&}ALyL6Ow0T>6ydAK6~s8b=cIL z&;3LEY7OgC!(HE%wf8GS?0nN{9CGDf9O@y*S+7_h+I$pZ7mIs(h~_(kkAEM_Lk#?= z3my5B?~CExdqZvRk1K=zM}>UkN(1rQXP0le(?TumA2V6gj^;+Sl(#y0Z%-UX`q&en z^rkk%UL})z2|$Ax3xU!C|14X4V^cH zSPz`QFJI!}TP)7#;Lm(f*cab9L3j3_47Pkb>a{n-E?2cXKg4-ouyIAG2VZ=O*E@4@ zSVLZ7mH(iFeS7?>iGB6aL*{D6hBq|j1LksYM-I;A!H3bBUAY_GGxx`y8o6gr3`0y) z&E>Nx_?Fj0K_7lM#v|do@OE4LMC=PWyg!!5oS^lZP`hz>zaaRhrMTwC_v6ASj?wJK zH^#!|r!{Vm8{&-kdhCkbp-+E4Hh1su8b2IAXwIkF=^r%{$FZTu=Y{zBhZ-#lcbw5( zUTiN4zfBv0E$d?X@we!%yuA0;$ewSbJf`cwubR?HT;ic8?cLE!>HoJ( zZ*8vbkHyn*RnUix+v3iU&$c+XH_tad8*23Lz!u_p5$2%tDwnp(h8oeJmyFG4>4~Kl@I3V7IK@)c6j6*{oh*f;~pVpeTVm>!E zgg1PQLoLKppG?PoW+U5+LSA&C1ID4gvw|;ih=~sTtKHFYWE>H%3Vsidso+@($(bL1M?V^8isws_kD33oaYuOf_Q`m0qj%Q_ z4NeO&?TIv)^}|f5*YEXS+;Y_0gU)i5BcJN*Tx?>J6J4OMoR7mBxq>aZIg^Xn27AtF z`U-KGfk0&7TXl-yffgw@lWbYJ78C6Kp&jzY*^U z@%(Mv7PO+ttuYllL(T4wbA$F`x!^~i#q>W{%5TP3+WUN@`RHFYJ|EA-uGX}FIi83c zTgzjrH$T;Kpqe5{H4LeE|q7suT(KTe4yG2HcAvpV>DPh2{gf2{GsxIO${ z-4o(*z9Vi3xm_FQggeK^!NK0DpzF$5A8W%oE(toA&kN^!VyFWR7ls#p3?b!qq>)X4qg;^@#9&fVw7d}fG2EPBlSi{kXqdo*Qt*<`)3adD`J-xC^+ zgLd+zLv@{g%ip2lu2CFf)*A;-5C{KykUyi?`4rDdleJvzdnw|5v@awKQE!9RpiF>GpxZIa2!%!1G#7&1;!8aS`7-D97D!v!bg&f}&--vfl*6(V( zD=rUy9*S=TEot}7cseeOwei`wFU}AB^wHQ7+r#@?3-iw#zaMEd;|+};h^_HRJQ%l4 z)_WV-el#wLJ#ks@Ki>GhxFP;m@O^rm5%x}tskk!U8MDLQDdGFMG;&GqG7stH#=b+QJ zSQL-Pd9ga=_?gN4nMN8v7<8Hw@*2nD(7*i54_eVltn#pkI~eX)yk zZ!C$EVpp(j&DV;c3%uiN|B&y3keA<}S;4Q~JtCabYR55t{#!LB?u2mQsSj?VOrp2^Rw zZ)U@O^*i9dG`cV@sS8+aetfxTp8x`MFH~cH=nQ_np&A-|822@ZGA5 zGun?Qqcgg(>)Uc>l(+iuZI4}f9vS1XJ~q_BXsuu6>fP}%GiYzkj~ck6M+W`XM{kIC zsKZ^&_k=sw#x;{sZTAL!)IuHi#UpW3IDabKSr%frCG?p5__{sPZaPQt=)LDct-V)= zhod*63mbREm9aa1A>6w;u8&`jv*N$U2jgesBSAAh)SCW-rgYs9$4?kHx@eA{z^Uo|ji|L~X! z+Flw5gnm$yi-P^_Aue@SXE}SjG2C}ojhyp$MmU!j`_6oiddm6PLDQ|l{>!26>iEsr z5Nycxq1Y1VPuA-1-si&|n)zPj&;Fo4fBY|w)p1;?xzSy98Ba!b`3tABdlfCxV~1#uY(t zy5AKai_ZjmUx{Cc3t}8!3~%+Ae7_u*h40|7pyl7kd*d~I1NSz5IMn~^aev$rm&A?n zrg%2?#?5g){v{S<&#mIzF%*}bkAh%dzHWQvx26yxi!uUXKda-nP1g7J9Y&7?m}GjULUWI z?eW?;Bxnn|@TX?#p+*PBg)yGsTM+}_Lmj@_oVLe=IDRu83-&)0&i4l0)X4Yp(a;C~ z5O0d}L(8O$6@261`6C{_gfUz?2ltHFb@ESwu(3U6sk^DDw{je4u8 zo@9qFJu*NS{U|Q}j}EyX9O9sjQNODx4QS=P`O3+hZTaYBuxFGvZTRPFX4nd%F! zT!#fctlf8SsKc$zFAQ;A9qN5fh<$s|fL`qFjJx8hkoz!S-kiPL;-a9FbN3d6da}2t z9{zaa(~%~lKWMx*Xzg3@mVfX0+85`=Y0=y1UjF;Wd*b}~QqYfm@I{|>6ZSUJj~;3( zPP)5C`(5GAsj(w`1HKdf>}`%SV`0z$=6o9Yp*`(}JV(vN;#*i6bWqc+!RM<&jBN2k zx2gDG+!^fALT-15zIiHsHa5i(@l3oeo(s1ARh$+4vVUpNRE!4&9ckcPALuvw(ON$| z8y|`8%{ZX(4e@APR*hCQ-W(78Xm507;}!8s@o?w+H-0GOLl1qcb~F-`^^VZn@>dTv zuuq@kVq?5H-1nV57#re}5c_#S6LD_{eWErrT_0P5og-r$Ck0*S#_BjFs8I8_g7pKNlsGE4yfZlYNia&^N zgns_j_GStR>z4Nc){n7cTVfx>B#YgLxgO(4(@ZQ;uI56_-e*<^=|1acg z`UaLa|5{ua-wN*sT`q6_-ncFX-Lo~4<8zb!pK0{lM{_ow4d2_XaZ`LL=%b(YfEr#K zG-mSy@lsq6;{JS`7IJoeN1Pip+#K8EtYBkaOhtK2Yps`PdqgaV%VJj?8z+P}`bGW6 zAqMdbbrqR8FV*fZe+%?V$zil(ao}&4F#d%oh zmDk0F$lpjGj6)2I;$!htYz+OXA9uy7n2J5&9xdm`Ik7#~$Cg+g55|+>?)|}^ zGyd4JFCVd;9h*Zgd@hRfLfvS!CfHPur6DHY;Zbo?_#X7o$#G(MOPj-ED(J$WzBxa{ zBL?TzcLcjLCTs6^hWk&&vY@$G)k;! zJxYUHLO$Nmd~a+BcaIG*ER0tNZS1orF0tq{-@@Eb8~bMk+hX>1Yj`UsvGPY>F?na? zN4(DF1l{H4o8m)WVxWQX;P8#G$GZ;NyUB<}RDF!h5lZ*?qln zL=3*fH~8MaIXlkjAQp3)x#N881mF6%f?bGL{a1u|)a^~d_YJW;u8IvY)WAp!KG-+X zj&FTHBl|~(GyC3%LA~TEpYa57c<=ifhg#_ibrJ_#-pfNBXn+~PAARNF%pG;-o38Fz z`&NcJys!CN;#F~bygeQWcYPa|#9qBUt??W#(-V%D|sn{DAggWcN&jjt*{!*yPnQ=nM>57o2+P@{lzdpp;+2}tt zelCv58p}?rtt7b)&Bs_Qol3W9$oW7lmG4 z9h-x`(C_N}Xo%1FRH&1F@c#A?2it0*7o6)2_SAuHa$6e9C-c`fo*rxCxS-WlA(j(k zS%^VRhdTUy^WTVL;@dIkB=&E`3n8CF<8AS^_-4@gFXPvOMtbI#gN}NO@1Kk7CU{?s zM|At0sAoq1*vQ8v@%Ql)@qC;YYWj(IDd_rC+|rw^jbDj7;$XIfgpCx4Ha%Tk0hi=YIFRrNf4BepS3VrsBv@!zD304hp{2=Y%*k zoLRpnW=!VG8*xC;tsb`?iSeR1J=VqV2mS4d=R2_>o(bQ?PX~=&8^;BotAaoOteP9% zITNcG^uwZX*B(9TJtz1-H@uV2ig4c^UB|H?oYRqCwV=zapzo{Vz{z~b+nP3Fa);j| zLVvhF)ZwR^-xSX2{I_vKER4s~hh7^)zyC~fb~ZY(`P#;RZq$eR!1tmL=_tD2_WCX_dd}-;D=bpWpoc%%=U!?#}qr-p_20 zuCEJP@=b4RTGICD5Q`YtTpZ)DUp&*_uY1Kc9WTAu)I;jJZnD zy?@A=M*2;i)Ikm1;rGyRmybyuMt@z7-)mjXN52ri7`L~sR->uLe~f>K^cnSTbWw<> zTt~kiW5+aJd}n(NJP$H(JmThrwWjcWe8jkh(j^-{bTe-VEWUx|l1e?#N{ zjt|AZZq44C;=hHM|7E;A*ts!28_qu&m&NDel%VZ<ekb~drgQU#Z{u@u zZM+XGSxHmdckxF`08H+pPcd^LU|j)+f&_i{Qh-V<+( z3qzfr4e`-|zdd~eFE^_FN&UXC&FA)bUpyGM#u;&WJRW-I%(yP9+ejTh8Tpw0-cD_P zTBy&aSQWI~8Z;Y+<=6$eb_hc;sW zPz&{S#@=Be4zXI#3qD^P?~2!l9B^%jYfjj|F~+egoU4=iu%#!Cj@Ja=;^q5w!4I7d znc%)}RZPz1pAP=yERId#OdV#& zW#LROs^!kmLqmSF9&C$O3}U5`*z|yNF@a`c<=b1f=f+S+`)aF4*z(pMn}Z+k+4ra3 z?D}JyP1py!?wX6onfq)^#W~@-w!bLWOh)=G4t3FwbhBO&Ys38=;SK%VXGhP>pRC=L zhxpZDVff8BCB*t*Tpg=BcFwoo0q1Xz4Y4`SjrWJM&&KX}D4q$v^(yVz=W9F}mp9U% zj`oiYcj+rH_qWEJkP|KB!;eueVpV)kpnvg? z?r+=|<9Hyp#hD=o`dDv>Rg<~8{#9Hb>%;p6K@U0UgK?}38lDw$m$P~;i@8B#-yGfL z$mfO-KYQ|Zcc_hczg0f@qp$B#yHrdBVYWFL;YzXb~dc_f;eb9sl&*dKZ--* zFXP|FcZ06K5&u1&2yefUc4s$!GSXqRtMRgs3t#5yL=Umu6Z8`Mj5zN{!}P!7rOk{_ zW@EhH^nZUbTH2WJ(We^U&{)hPda5=1{~Wj1dVN1L(rNVlTII0zu8JpPD!vpKbarfG zvCr7usD?(deKKy0XM;{Zj9-r54sZW9eyMklX5;_uXXNi%e=+_%elmVNiec0+{oj53 zV&hSvZ;NB}Q;o+os^x#KByy(vi}6^kcvqwE@PlznYkhEdOvOLMuf>V+uj3cu+i^(z zK|B*LOxDKVjGvF!N4}>2?^4rky58kJ{qO2ijq`)uTZ2~W^wwgO%ily=&Pdbg|DB=Y z8!c~6i%*B|MULNy&&NaYl}NuC3mgA9I-l-|@|pf;HZ9&1o5KA&gD&cDW=zEmab1XM zWBg3eS#3TMe-zh;obQdNb?*=X28;6Fsz6Ea%i8sc5AwK6n7jKSjQGZY0pK3fWj)})(bNG&D#iem` zu)!Xkp9_7#rul8*d)gE43Vpmbw#F$z>x*JTsDW{P?2M;kQ#=qi#b-m#XN0%vzA@P1 z^BwV?;6tyd3tM-GoE`}^RY$)A-qTVZ^CoM$$>pqA7)QmjaR1RbF4Tt4RiS_D!RdcL zzNB%e!yU~pkAq@Yh{@>Ocwx|SX7KgYWOV-4pwS6&aBPX=C;c=d<*6+z=~h1`r6$NPica%SIq8jEFlu%RY2m^~Tgzu*;XwV~xb?__=)5Ql|#$HswiWQdaktAs_zPQG+*!SbjQQ40?Sv*uF1z z1byy`+k@`LouO}Th~2^8xj`R(oa^1G5ce&iM)KuD|E&r2vUhnr6MFTgcwex6W2nD9 zYc|;z-&r9Z8uRUYp!3>T5p-D?-fWJegXa5USyf)Dzc%aaZF z><#|KX-^z{o5S6yxG~huef7OM=)>2BxH;6(+Y93H*b!oVF6IRN<>DSK&k-MDL z_w|#}dTy)>ntJ2DT=a^2eA}lt_>d1f!y9&JCx49Mu#l@*_*V}xI@cpcc80o7HQyET zcUPa?8)9?UH>eNkslVy6CpHER4t3B!^rMS?Jwv~l@yeK6PJi3uMtRUxKO7bE^qy_{ zANqp%$>784%&#})1iMLhnr$ih>6y32!1DZo37E&?}xQ725}q3q3^^-16mAm ziNzi03%cr4w&l*2_hK-r1wA2_K~HNMvQG>C`0yt`cHmF$&W2d%VC{UUgV@=3S1*Z) z4?4+%MuR=)&fH;VVaSJFu?%tX=e<#X4Cm%UfAGf^E%>=D*r$`VZ$fVNd{bwJGkVcS z4aC9cVAGs0I*8Bcj4%5T?+_ombhS2T-+lfEE&2cdy1Ear&FlKk_t*W*&OZVTV8 zLqbhAg*?s=c20`V#iMb0ToTUnErxZ$_POD{oW!y@_&9hnzM@ebeKQw@9`Rls6>O;U zuEGEM*fS1{&2efBb-2Fy#i8ffKW)PHjhlnL+3=$$SB*y zeIUmN!=Fid=?~(XI6OWT$4@x4QBALm>q3t7VAq|qLJn^TI?LC4vNF8ibaTf!-#L4r z-9fQ9UKY-)$6@iRxFGlzr#*JrVq;0@6|pXhePe0JW82tc@=OY^6x zus77-(Fl<)6euLHFZ=j|bw$V9)%L?&z0WLp|Re>aN!E66cCICr*#YLS5)a zfA1q->Pc_D)RO&&L%i%=80UxF?h5&_tbmfxjbzcJlB{hnMtPvGvZlqx*0Ex3#ejv=EDO|d4#wJ4TP z#s!VbVsXgNU32}WrtEH*%=c{+*V5Q8#Lwp;A$E0TpQd_fWjOEsIV#-wT)2P0x1w<3*z{=I_z10 zby(9&YvaOLAL^r4Hw9hw37bcS-q{%9x-!Jf|9NqH92oY&HoLQNc!*n0)(!|~)QA@B zsu|t*W5=E0J>x?RFNwWFFSA4Uxp*O-4mG+rXnS|)t0&{uI4u^$&GEJnKb_wnbZ3wK zQ$qgY+co6D_bWrJ*7g1Qp*Q|Zd?4iaXYu+_54yfH?u#wq-ZPQ?5#8Px?~0!UAN{vc|rnB5>Ko|YG zHHvA59sQ?fo8zlNH!zng2^t(94qRuqEDdcEhzBk4Z!QTDBKR@De?x2_r z{#S*3?dyLdU5^cY^JMV1uAIFKa+V9tz8~_xC{B%kiQa(``*i+9sOhp`R}a!i-1`N; z-lN?@Pl$KVu)cTf613oxrpJYTVDFGv8uqQRzc|EwV9-%-u|>yi<7Kf&$jh9MeL~*b z$6=vf{2J|BbAK+p-^Yae;ua4*-Sy0`7)BvBajS##wBuXuo|gps{OlOw+$m@=Ga22L z!-DV~xh|^L=!=bFk-IwlD9(tv@DBV_=;Nz{9={y!5BcuboL+qG72;Ae{UbIp(dXsi zj&+)!5bmvvHDPaYxa&Ql4c%ws@K7^((nTJ0+9Bkr)_fUhVxJ#c@N2X`Xd>pJ4ojO4 zy397;KlGOKvmti9t`71Ov;74jK5OhcZ?5OuSr~HXM?b2A*udU!U+?k%+aWITh*^DT zLlbM)hjq1~->IQ4bYOq*#Wo-Oss%r^*f!`THs{6c&Z5c44}I-h=Sx4+d#J0v8s01R z<;t$yd%?2mrG@$TrInmM`grZ}{9@otXO zV@-TCPKmR^xsOM2&b+Dd!}0ICb6w*BAujRI)F>XXBM!Rhd%DjCJ!voAOTxKx;_{%o z9B8pV_&6xkd~49cw{uCX4Yd|KAD|oCpu zhHud$jq4l#M|?MKj$e!a8J`Zd`d)mbbC)*$Nj%z|p3lWY!OtJXJ@JG1eESX4KVxoh z{<(O6sAWDzM>m$!`1j}hjuth)FMb%$#_xyoZ;02#d*ZF}e5jw^P@mo6jd5D2wR#ob zxQ2%{>XVPf*|99n3O3VksKfa0>c81@{W<>coxj}pe5g}38h@9sXjJFx;$Py$*d_i; zd?U^czP=uB4l&Ng@5HIWuR1>5Yjs5Vszoex=L6yX@5Z}Ap8DeUkmuV&pLW;pQZvr~ zII8{VxJGsHzIgZQq4A&Tw=~`yUkG{I7 zvb`~`34JNI6Jk^7^))dUa@S}32m9(S=jCxqygukW7c^sIoAAC}6@2U%)-Mk4r8>wv zEynhCYSf3Xh@HcEK6VSSInTf6-NVnGA%?-;8O_DIBuz(+t3#~SX#DR!tJzq?^^G)I z9RD@8#JfX{ULN~~{P{QXBL?3r8i~&+9_xFBxDE}r@<0B@h=skwLJV@Fg|%14s$fGL zdxv}51?~7{ciVVr$l1F=hXo-Jz?*Wcw<}^bXLPl;?=Q3oEOiA zeR=tYtJBKhQ$NauZtlBFi^D^#C#AQ19*eXZ|5@=wbM^mf+#Y()`K#iFV1q8##WB4P zf7$qZ@saL-w~;pMVsq%>HKD%dX9v9wjZG8mi%CCR9(?%D9})c9cQ0S#eA%YE-Z?(R zc4Vm0)}W_a@UdUeeKz=`gPN$lcM9b_W<}%aG1Nhw)YyHayj~t^NV6qDbNwnOYkcy} zmNom%^QY!7iJ8ghoEmyI+T)u&{^Tu&uY}xYC!<=M+tZt8#cT{`PHFDGJk?wuvCYSh z!LK!O=|RxUn$cNzg{YTCa#6R79Ui73P z4ZOc%pv{qSTsTMTM}oF&(*2we1C4JATId)4S`hY~T^@8dUmxCo`?GOOsQdOICujA*!q_KfV~?=!U4L2Z z6@J7_GqKEs{!ohptcUPoEZDY z4})DA%m3XWE`37d3~Gyl%f271H#ptG~|)nDT0bAWI8 z%ud#v_59M1$2Jq#+$l!EHjUMaj&ilnH{JB;5aUn>y68bR7KOUF&psWFjKQCCdX!dl zy)>MW!;qI+@aaBXLNC}?G|c7YwHJxT;(k`e%%w#P>*HJ-J$EgVGo|I%M<*vKNGaI?>(hC zXe0*jjCefj8)xU@zPKmoVt!|w8}=`c+kzH!KR-SgHwV98iA^yZYEE}?9}?`X3FqV_ zmU13BbA1%c=xdD+M{A?^HR_4i#If-YaeLel-n*YfaV>av<6p<$ma?8YJk*XhXT-rF zCbilcL;lvCwN85)UKr}TJWdY2%s0egadeyz@?rnf@Se#UpW6+!)Ws1#w$k9Cya6Lv6g9{J$#X!S8uN2f5L}_kwQp-yFvW z9n{Ai+Aj<7h)>TAb$D}g@v_G!9i6c+S9!^kFXIn`_WIz%@kAUK>TplEb3*JD_lEe` z*eT}f;d2`QC>Gb-dgMbm6}b{6(bA42}LQez!Z9Hm;3BVrgs% z`qJp6aBugZh5Wq}&eC*AsGIsa%kG>b<0VSS!{JN^E-c|Ew}-!$G3Y^aTTzB%ah!O%n3hWyo|ym!33``sHgU-*O8 z_6t2g3$gAN;^lKTb_ltOM@_81GIoi5L(HD(d~~=w8`kQTu@-btOSN&wo*b>QD>r`R zO1Ht+j+5twjq1U__?#6t-G(~+ZS!9V@qInM8t)8oem6cAw*;M^4QJmPUu`#O>uoZ-VxT^^<009`}FF@SBKnv2e!ssoH9Xetf`CK z^p1Ce&MV@gpoKi?t>4snY0Spv5YJGDGn3dan#r5*p+ET$ zo4B0kTMz9L_I3|vM?u3q!}>uJ%&j>u4*eyk!LGS?#>lrkX2SmPOiOcT_^~gZpM7`4 z%+FAV4>iAS@_cRMTjTn0=bn%+_&YPy^}dkXgYi(T4q81HH^rvl*ZnndXzUzU#D9w? zBCSSuHa->l=c93ZToOmcMKKp*IwsDDQ@cYu_q+%14QKg)IQ8Y(acqd?fDqf^aYC#L z_ME>X^yJ!L=l-zI&ViA?@wf7jM*ZTRb7EN<$Hz|LePBb)=qvY)u|9U6a6zLxdQN|c zlf9u1cQ-#N^zYV?1N&;~9DCR%j*Qddlu+j@Lk;+M-~Pcd8*~?&cS3)9pPaol)QSCf z#Bs4Wo{tm5o#kn(0PdZ~?UbHu#4LZ-=;$A{vd;)J+3 zoPSGr=N^xfgU>$*Ha-(SjqBqVgT1fBGjUB^5l@G2$NR#4U4SIU9lCIIdqO=<=sA+biPyU&YmZLh18t{9;_2ZFWBv{8v|h2kW03w*AfK=`;FV zw!YMw+`Wg#{@ebzzw3|j?`lspifd7<3i+QK^f)*k3};>y55ybd=+Fnw-5A#F^K)O& z|Mc*kW3PT1_t?tD+1M%6=7_j5UNT{AuIoL9FM@ybw>nV`oRu{_k}wUcLOz7hO;f8D<-RBk%2h zjBJc(@bx%7_K)Y{@wg^@|GpKc#pZDTp2E)7=!NF%3X#5_3BC8$xIf+#mrVFbqxCC; zk7C*Go!x)2`A>Tv-qd(@oDlRnKQ@H>&dWy}`s0|mIF5@`V|}ROZE;du77v7d@vF65 zF9`kNS)A%7r$Z;tdo=37RTI>hzsmbQSyJ;72Tr!WlJO5KBW3sPQP4gdCR#`}c?3)b!-WO>ts)$G$k3^Uvp> z2Mzu%X!E|PCgVPQ`DE0;;?f`DlCQkPCSULBeqrBSJbcNI*79Ymc*a_=K_9)Zo`Z&R zbWU#D#x}8Yco%0Tb9-1A_V)~a#p4|jhy9@r|9A7>4O)IT{?GVYIPp&x!P9*sLfoWB_~+Z=p9l-|yLJ{C1MemyP;K0h0;5B+~L z@WJ1mp2CCwH1qekBxW4ez8-i@0L)L!zRzx_77S-7494!C&gSm5=E0=Y^3M7*GH52@SZyFo_%Y6_}pePIej9fj#vwgaH7N0faTpShha>jV{WMp?;Y>Wd!K8MEU@ZPz9X3%Y~aGz%K z8T6p%pb0J1R8P3Wp58nos_jV5VzJN9`@(yET!`VDu_nBmo8tbsJk*LmqaI<0U%u2q zPl0~o95nXq+{#dkWfRy^59bbvMZuP}17kK`Ho=-a=*{m?7yiuUE^pe1W&5AOzWYNR zo@;(XxO;cJIc^TMdN8htb0*L7elea1{Xi?aoEGB!SiC;Y3Nbt%m&BG}|F+uUrpEs_ z%6Eaj`C_~(-V^u6TjIpY{DH=|#~E>TRJR#D@cWUSkv=&)jt=YMG*{n?@%^|iA1SEI{g_n?(t(LZ$2w=|H4zFZP)iNQC9 zE&V2^p$=bbP6PHn6u%Kqh5bk4_7Iczwd$802i(@9X3I2Tp{IJcZdkZIs&A#W|Cb%=yLCrS? zo#lOE@J**f;ZgOIq7TX3pM$e8fy-9)nKe)E{!D8~-$sKVA5DR&R)jp6f!)D}sNu;$Lk*gQdau zqHu>@ww#lnT;$L9!k{xBG?uqBz5ipKwrhkvF%5N)6aQ?@293qyE=^Vjf9iff{C3bl zZl4Kza^~Nh&TOmW@?b|yeu`zm@r}P6_0>Z8Yzg)Mafs#eP|uHqI(s*bpN{%vn{185 z@{>Y(f201St?vPC#5Ei8R*xMg=o@S5Mt3oZ`{)q&ir5r`_Rg7GqX)a{yeOP!!@4}_ zss{9zufC*#J9N>*p80fEEcV#9J{KR1OGAHL5|6}%!R}n#5amC%e|_U^@y2lf3vqiK z8SGve=LfCjd`r-l-r_wwUWnI4v5t;uyenw%iMSwM6a2ArP23nqOycqdgsm-3TISvVX^j{svhWm4IZk!U^1Rwm`Ul)9Wg09EKvREAIc0kO=qR?md=;oWm zE+3x7pa$aKb^^WlaF2%0@H?r)NR9q5*!Z3J5Ahoz&VP+}#vyHdLZQ;A|^)x)8^$pF-cfsL}{~Qlb?tZiRkAn8^i$4yru;;yf zvis`!SMl-g=s)()ic8|HadEsOo(}oQ{hYWU*fl;Fr^RQ2{RiSp@qY#Fz7c%>TD&Rl zj?3fTaQ3-4Gu|9(yK`I{{LuT9xGDHiEA=kk@png!PY&<;^D(Kz_|HPIZWC%a^oM+X zH@1ZO5AVn3=Ja#^+}JC24g33rI;;1Np|^Gk`-RTOkP{u$xOZgyXQ_B-u}{d0Mg#Vo6_+)B4hv`bGG~WPeORxJ*Tt)k4+*v* zui2o3T)Y$82i<7P&XDJ}&DrB;hj4bVIn?1(tvwTTdvE+gJQnu97V4_@+eNz1h~e*} z@7%~a{c>c8^TY8_oH2R+O5?GyJj8NyVdQION%K<+=T{nkJuZ!Zh_}Wchu(gBd@I;| zP5f#+ntUJX{#E&YGLv#SJ1z`8b7QOzJ%4_v<>9e*GM?6WVw@K=zdn8|?uZY@7vt)9 zPdpv@82_Euy^XI6>kr54eL@r3?jCAJ!;?eqi{tpXCfuD1=dG!$ z9Il9CW5+ll+&eH9#6b=JmWuvN$x@qr)vBNAakWyR)H=n=4nfTu6%-YY+AM zQt(HIavSZ`czLL&wJ!u6P;C3Yq5D57j!`bpq2uQ0c_c=E;zBz69=gTwA#bI=wouLluBK}bv5ccH2wsrp5wLcqj5i9@n z^30aH=ruaHZ{IV2_T3kY+RcV|uL}9f=Z;Ww@zCnDSQN`AYjoysSQGnfSXVziWzIfZ{KB)ki%XAqUue(1Ju%rg z%F{R(CkHK@*NcO$@Y#bhI!#R1;N<3_dMNJ0(&YKS!jGD_utnyR`e%O@j zi=pm%LCk8T&icasEn)ATSRNO~3Bfo0%iAG-51_;a3add|9BXMzp>Jo|CR{h?0w&7BedTpSz6hqzV*otFh&J<~y~>>nEI z!yUFyj{`%@Y&*9&q$ze8D$It3!jh6J4zdA!-h*SLZ5eHirg>&*#3w1S@ms+TUylD5c&+?Vi7h|ZA zJIlklAqKgzKiB8ts$grjvG_)E{^!Z_Oyi%#dt#UPN&EHqh@bQv&4oNZ9BJmiD;xj) z@z)#6WprZWXX5_Q4}V!XuWbG9=G|NH(dPR%zd8OO{w{tTPsGo+H{1B1!J`k@B@mvz$iNAiQ1Bb5nE@(t0S$)y(m6D`R31@eR&x99Y*uvxt<#PR4=|4g}ZuYw~(9I zoU^9?hO=zYZDGibeL2re#_bxNr3-D@5SzKtSvH0`{9T4-g?MMM@kG2PE(>>W z4>f%#*!X_j9r|9pPsXKjYS8lWSQ{&X-&;a|-w@?FqP4zJ&tHo>;^J5nH^&2^Ztg!C zTjF0sT-U{mac1zZ@2s(TWr*2(yeaG*5f{eEadsRM&%~{i-j9Qt z%gcH77so4tAAPWBGUsdIY&aKi}vZwLBIF+ce;xX17QAjhw$a z?uvgOa$;A%xx;_?jd%%@=G)M+MA6-!+qmm%eiH-BQI?3I$ z_eWmV7f-m$rT-=ColE1L5)=sw$=AJ6J%E(bqy*b-u4`;yps0-cuzUps~S z?(xUhDCG0e&@W>7v$!DA+4k6%K3WqW2|aQ|sKe(X9Tv#vSL3VQ`ISbwutirIibpMI zt}a6?V&I>4@^D8SCxpIH?<0fOo@YZHyf@o4 zr=7i@t;6?v{#vMIxsCtKdjI749gUC0O~L+a;~POsdH-Sja$Fs>dOjYC%R`*bT@ssv z4%r%gvGLa1Ocd?BugKZ$38Z{M0n|Y2riE{@rd zH{H&O1LC>3AmqJ6ygu#3@?sSK=qHWe3ip3Mo{1ad!nh;!!NVaRaNLL8^W&9ON=?;ZO8 z=%DXDaZu3tm{13OPe(TPk5!>R)p)1_yY{z>^>IUNz zmr;HeoYmTF=*gE);BPj(k7C;)_?HjO@;m-!iP^sT4BBjNPRFJ3%CIgjcNWEvpY=T^ zYYQ4_MwYhkA=(}B`ejC4&-aB^mh~A-tn%9FP>mQEd z9rxyW%{RtikB-jsdvT~~`i)~bwlJ;zUDOi`f2r{+QSXfZx0vo~{$iZpvpAm%v9&(Z zV|Q2X-)a7f@tN*#Zf!$kYuvNvHtHq4DF*Kh?bWDw$M24K4i8#i9S=nQ$KRmd*KrTX zLH$pOtAY=o)zUyS>nPHN^oToH5;!{%_t9Gl|E7JU zdqUiAiYvo+hW%UO@pvfo(>LSMkUQPzsBXuE_?+Dw%R{Vm`9$0ldgY(O9p}FuPsTMN ze&bt1oll6@1f89^IWCxtTN-KZjCbp($#_xYV(h3h&{;;T|0phPuqgOM({l+&FLh)@bOC=L18|XT+oqqjDYH+4C#^Z5*B2 zxVY!yAIb4g;@^dL>91?+?2P15f6mbB|BKfK+kYS5iCaTlKMA%kk6q&zdQH=N^zG(H z)Z1Tfen9i;GTJwGZvGEJhv$0Uzwz^-7VnF9^;-9hm|4?aYYYFTwNJ#`;)(blLDwI~ zr#nNB8{@*bH@+3m#0R3a5ncXUcn_Rc!^`3ap@+T}x5vdHhBr)}-_l4M&*d@x=IIkP zqsgYYE?3G0I`@I5Fyh@wm3Jz8KpR z2fI7O9>I^j!T)y6*>%RAbFT>RiZwR*7K^xMW0#PxbJq6>8nDX-pLCFm9QokSXwNx& z3xh2qzuSiUY&tg={LF^){LoGwp6k8wdD`+vM=?1w3j1>4gMaI6i^+L=e7f&E-|}$A zK8WDFFnYnT8W2E zaq2n#ozr*hduGoX+j6rmCv!gFjD7m6&46?Eezp#B;h*k@#mcbnzUSGXJ^SMJUeSiF zSI6vRPOsxbTtjcDiyCc-gX4r)8-qsPM`!t^ zrQTqhp7v-U9?+c*>l32oW2 z@7`R91#HpxfY1+~4-L7|h5bb#Zs*wZPRom}LH8kM`{Eunpu4=p<2-xxTpx0NFxcjw z9%lwW=3+Q2yl)o;eUF|n*kgm1Y>QW1bQ&}<4%&)o&~B&$n{*Jf`@?r(S#xpA%bs&= ztCvxpa#4f9u6JS3WOyI=76ZTb*@JU)u`=k%*5VknXMcInMXrA6V$FH>UKRGmKxaD8 zOx*m7Q_W%Dv-9+^e`2f-TKst|jRS*?>nG3hJ}mADvEDfuFKqN~@HOD9d}$~q_Glwl z{YN`<@fgLsF8DXfT@MaE#d=W4+Zi>{7v}u9FAqJ)9^`4{i*9tJmGfdy1F@N_x3hD> zwmz^g20HNN{-QW4279y3#b)GxV+{T1S-(KtG4vz9;*-DF#pHQq$X$KD9rirS`;HKY zc=X3y@b!%l)8+AS$U|Sz+dfU?2=x(%HKTK4f&9cFM{y1Mi&;GG5Bu~Q?D6N>9a;}M zv&qLyh{YM2$P;YYx8|%^JP&pFr{;eY{C_&W8lMRFJ|Az33qmgP{z5z(eElFk80W=o z*t<98f|l=(ljEe|_x16Q@pPoo=#0kC2K)aKACGs%*0?Xe9FK)_PsL^3IkwTc2jXwz z>^L^!c^1GWLw+ac*1{&d-H+Sl=xOeV`}x(M+fXJ=I-J)e3yuH?pHQ z{HU{M?-qORtDk50jci-7k|jtbu=Hm%8lZMq*6%j1Z!w#{S|=ipbb zxjXpg)0$`g+~G&=bfk}5<>GnBMZSD^7CRqocsBPw_+gt(h{5x&A#Z2cwPp`uV2ACw zI4JlUbX6C5dp;n<#~xdI1a08wkQid}EH=oQuc4k|^CJ$ii@`qoi-P?JgZ;tZna%Bs zhd=!;<{LuHE8?W!Yw#&obN1QP2lC+GIndZSXUrGJ(0{)BwD1hEED15FnK;~Gmwm7= z7q;2-Oe<@i{a6#1ILtBlAL_ui_r{OC*%s+5et3!prbQlvOnk| z2KU+78gdn<_uQO~>q2jOKg4!S*rNqb3Fqz&wX=_*21fUcbTZO%&}h&}4C11db8HW1 zX)Rw`o6Cy^^l-*md*T@Gup>7(=f@g7^e7#Ewhx>eo}C}+u(J7@5U=t0uqKA1V?)e_ z`^#c=Yz%pw5eLUmi`nLjW5_{`Uu{6ZGJdOn3yGE9Y(q zcWFs)z2Tl-Fsh;VhFjN=+9_m2T^|5`(MQ;4j`LI|Q;#ODj-5>s}l-o@q7XJBjPMqq%w>@_4 zt7CDE|9>dn@o#H0Mz-YjgIE%3&(7;(S*Vru$D{A}%$^|i zLB|!bE)EEH)j>>b(oqcSL;mcm8Czlyi+5Pv1b`EQ=q<+3~jcM7Rfe=^sAmWNjG5Dh_#gFKD69)PWD07Q@)S9Q32Ra^D=Y z;hr-?ee9{oo;%7Uiw2^>O)Io@7MHOwY<6edgst^)-zlBLT${K zP3A*Qhn`n|Ht9wS_r)^wfEdJL&snhVc`nw);&5JkC&ilBKFW3cPPoJ0fgStvH28m- zr_)TDj{p3Wvlz~cJ44)jJ`ncg=3QKw-k5G55`yHKgM6j zO>t{HUTYNF4sWP!-e0QUZH+H_%DMQ)eJ5w}sLi&Mk$*YZqoKJwhld)?#>tbpXXqie z<CVnu9?MImnM7liZddse5_ zVgH8UpH2I;-#++*xqWl??ducIerAJBdHpaB3c4 z<>Q>(t$AjbFFMe2$bZm>&TP5AJeCF@^z@yRw>34uzM($e-{E~=TQAW=esqwtb2RrI z@ng@LeY!YboD1!bic+1nx%J9huDz=X=YKR=9IYBAtB1*&VX}6ZtQ#ilhslOvvT>Mf znoMR^t(?vV+cT>M<1=;ozeO{%YA`>uYOp`EYH%>KYVa_#YH%^LYU*S4)W_SOKH$J(inwNoE!r#{wBeXO1OSU2^tZt7#*)W^E1k9AWY>!v=| zO?|AJ`dByhv2N;P{nW?$sgLzjAM2++)=zz`pZZuo^|5~HWBt^}`l*i%Qy&|qJ~m8! zY?%7kF!ixv>SM#y$A+no4O1T*ram@KeQcci*f{mEaq45^)W^oDkBw6w8>c=tPJL{g z`q(t}v1#gK)6~bNsgF%lADgB=HcfqOn)=u@^|5L2v0&B8!AaA>OVhzk)4@;E!BNw} zQ`5m!)4^BM!CBLJZ_`(P!KzjB-li{otMlHbuYIfY-li{ptMlHbuYRlZ-li}Af>qPE zzv+B#)AzsC`P`;ofL7;on|=dYozHFh6!`VAb?1(R4ny>35>l`P`;oidN@yn|>==ozHFh zwOFuf`nhO2pWF0<(dv9|(@#dL^SMnw8m-RfHvMe0I-lG0!?9r1^vlt7KDX(&qt*G` zreBX%=X0BWKU$s7ZTbaSuxk1VX*!?V^dr*hd~VavNUQU?O+O^9&gVA$l(ag{ZT0-O zWcB>FWcB>FWcB>FWcB>FWcB>FWcB>FWcB>FWcB>FWcB>FWcB>FWcB>FWcB>FWcB>F zWOd(?|Ls?tt8c#YuHjdl;rE^$uetK>D_2~1XTSaYZ^w7I^R|0e48Ilm^=;;n{?AW; L`qSqd{uTc}encM1 literal 0 HcmV?d00001 diff --git a/data/synthetic_breast_true_exposures.rda b/data/synthetic_breast_true_exposures.rda new file mode 100644 index 0000000000000000000000000000000000000000..09533f0911ea3e11e7cb0bac21250664225c85c2 GIT binary patch literal 14050 zcmZ8|cRW|&|F*0s*}FtGQHV(5GE;W;-h1!8_ujIzlO&arLWqhaO)8|6B1twC&*ynw z&-eTKy?%e3&$-X%+~=J8obx{Kab4GaLB-_6K9hY!L_{P+q$GQYNXQ6HA~Hh*HBlKN zB2r?)#vUS4B1*#gK!9JMqqT>(t%IwVvp?Yg2Ev~dgoT>0yhTE*vD5=nU*w4|T;E|d zMtXmy_KS!AkfZrZSM+WgGESL~y-rR>s$eO5iisH#T;)!TyPF~I2usuqry#PQj~)xl zkHd-eI?|qGN<K^S@usV0{lpTeWxhl#zATDZ3CH~v zf2ENj@|W*NlRshA zn2pGtPe(S@DKSmS3?zrRzm$Ev2RS56hf>e+B6&c6EBY@rauVrS9o**;|6ykJ{ID6) zE-}){3XDN#+$)Xe@&b|$l(1}i{X^_yb8pt~cadV{BfY+P90^8Ox#~?u5GaJz`My!a zwWK&d;p9awr^F$~G-{-n2QDZ+k3+mr`##^J3W%8=Z+7?RLCPzAQyykv6i3s2R%TpA z?5j3b(}+Zr*R~!y&9{s^Yx(}7-(e`|cIQOfdOZ?n!Kg(+l3v)l}kI>q0Zb=we;(<%CE zh6{No zI?DJM;*~;sdx6x2sc zh~vZva*mKrmhL6Ij|R8rHGBP0|LxQC?n4_SKh@tube~{fy>^pwQEySO-5*=o=YnFT zok07PuSnA^(&5vxK$g0XbWBqeVoYo|B=d`q#OZRc^S3vWd7pMc{13{irRURY8c@1& z>`#9BSJaS91e2DyAwj`EN71nz8EP*^UKrOQ!QjQ+%Pgx%TVEOb82SrIZ{BSVlv<*Y zqcW>+{vRULYdc5EXAz?qVWE043kAyRMCM&e@GdScw`!Ng#lfD*(gRsYvT^tNdvO|> zcgHW7c>F<5E#uS`#WrNR#f8wyIG{|eW^8pYJ5qz{6P*)&B9^iu!t-VZ@(UU-`>k># zwN-AW@9PqBY;~oFcR3Knrk(sSRRSrIJ|xYNaYzt({KR-ejPPBpWd7l@LAk9#dxGI2 zYV#e=wOeE%+wXMcHy0TsJtL=T*%OAMmo83TKX{R2PE%MJ>W4Ij&42rzGN53qCBgX8 z6(s2LlFpTGBEz(LqIHrOxg$+8e}t%!`dpanL(okW$X<$(;qXBI+_|&!CbdX^sPe_O7257oJadr=-Bb}vWwF!E?D%nx0Bi(Ixb8p;dRNWB=aIoa2b#9B6I z4S5TsYLuBNJh|7G9KuBpLjgBZkDq5p|U-QvKyq|Zk zUduk@w33O4J!C}g<@P@s@zW@ASSnO}BY?y^T_#~{fYSAG_epR$D_ZrfBUmLN#c!Jaj-q7o!zKAV7 zZ?He^015_w%&a@Eqc};{Vg~9+%q7XLx&0Q|$`Oz=4MCd5sTzGwNu*Ad(44hYVq;S{bC-udRe! zmca9CdpvRR@{y5=F-FA4w26i-RUsxxeNS@rO=LA$-QFb2|8LwfdPxx%S$GF-<9I0O z><1g~_j@J!Mqq2QL(WIA|1h=VI3Fv7bkWa;_r9}--hFejy8Biz@=7P2&!vWoOrp44 zoiaZ5d<-))+$KDyR(^`>)^J$!?^$!Yi$Zz+upYL*aNuQL3t$t5@ANC1%GK8}2gy0fMZ|C6#;8He0p7qG}G zI}!?mXLSSjTc(hpIj4wca)|RBd3LJ!Eg}oL+P=poAvo;kth^O79Qby>6`p*I9FOtq zZ$jGeiG66XnpX+g`77FfDuVEZsu7ciM3><_2f88z3DvRv*^)!(Wy^Brct+U0 z>U_enB!K=SH0E5&x(KPicDi!?Ey4qSeHngXhOoU#74kBjusb_B(DX?K<{sGz34&Je z4mo#~e549K_n(u5*YM&p?c{92-)W?1wS3P_JOzUjc@tCwdvjr%i)D2@4Xaecm%;H@ z;PW*g?cA+$g#0>|5_mxpp2l(U5kjBgfBj{8+_!zO&A7?2wMzk|GtU>~f&E&qzcCb`+Io=TU6L!rvzL?*&giE8fpWUhrZ2pWj zSt+uhqH4Ub*P#~flB1-?nXiz}si+=8T#44D{(!s{LVoysu~6tHG2Bm1k$q8ZhTqfZ znVC0(urmA^YyZy)Zj)2$Jmbu0GV%T)J$nc4mL|VrhI-L>>C8_eX?KLDWqmU`?FI)5 zwSkgcWB8PQ4!$PG4-3aZZ)@{rxbT@AOe5TP@{dy=W{xDoPg?x0(LfJ^&V1auxp5r! zbFR*+1wN>b_BAS{pF)7e`VEA z$FB@U*DEX#xS+<%4Nf8z+#cw;Vh6I05c|=Kh+55aP8OyR-t+ zDz2OEZ#tPX15>^xr}p*3XpJJj^DQwAW^U@7RTavxaG3q_W^MrX(y>kq8~0&L!Ps(N z(HnLgJXPjb#1T5ZC#2z;Hlm-2IvH3n!0BHWk@Ch*xVu<#1>Bl|^Y`N0#ozs)_x+Kk z^o?tTy5Y%++{r9N`W8!kubw7c@9FiW3-lQ8`mu7|Vj1>3LoSaqRQ?;cVg%MR3O(;4 zQOQ#;%;zoc4^y#BHr;}~*Tiua-6g~&^tHM?RYz(J+XpI7VYuH{Tae=KM&h{L@eQ98 zB>h?{Kisa3z(iY`>c#;Cz0~Cu^Cd^(uI^M6BN^h13tj~V8=x9oepw-TC|m6Mbh)g7 zkYBY}wyVj&fAx>GCv7A$ja|ybd1&FI^yCruyH$ADUN#wdFM%MJb4QPB+(1}1Z9zyc z7rbi=rvTRiDKtG?XPSi_)E3T>zp~XoEObZo9#z(XK{OLsy1rqn$B1W zP$A*Yc%=)YE^aE#kNbvf;$|X|gbM>Pq9gejVPS_<6SJ5#o(#liUFp*sl*t(sV|`C*!1uXc-}1{_r|2KPrfl+F<^g zv2|P-WXjBD%0V2{`@Vvm6=f`uJn@9Q_Y8sp;>nwCq9+4PAn{pgbY3NV>`uMfgtWA4c~c zrX%>bfGNIyLjG{?gG_PP1g=@vMX?3+p_=vaW=Yp^)RnY8>0h%$TauHS|smpfSq8MQQ9As@^^JTrQhJ z>0jCDXd4D(db+cjp5rIfk=gCKDnO$4p3mi?eJD?RvL-5Ljh<&ByM4UY==i1?Fi7+P z8G`TWs`aSRw8zrEf|d~pfmxq64(#A=P*UWoQ3^Vo&NXjy7$aAAd9~`|DzZ=d*R>Y< zpfuogdPev+6nb`iDPbkW?ay`+F_+kJ%W6f<|BoGV9;axV=Nq6^RqW$U(XXPh{&{kvdQvXB^@;AxJ zOU@CFPwP8dIh2aTfg_dg41b`N)qbL)K^hmoQf?f*A&$NjiJyP#t&qIvD3wlJhilL3 zlkROxA-D4MFgV%J(0cY{SbGmLF9#)7%)LkMyNMc;o|j0Vw7KJZZ3`E#bG%oisKOJf zZxkz)!AQ|1qq!95gzIA|b`Lyhk<;OuVL-fq1h3`U%d-pUco1cfLZXYi1E1&5kWnM@ z8Ha#nt`+JiMUPS(EkP@l@o(je#ORbLXBaYQL~L<%aLQwC#GkX@A#e2~;1Snpd%izH zqQ)@vZ?Oo6O2ZZ5Lp(#^dthq~w3&R>a{N z(S1RAaHspY#Pd7?)zzNI9~|&T=+_|4Gm`WObgH=8H5r8v^F50`zuFK`BO(ydN=nEt z`IM#J-a(jLY0!ib5n_+3)YPxE!JKu{o}zjYDG%9cUv-SbLj2+h!KNA%+U;D{AO$iS zvxe;N?7;mW@Cj8nPf@RJWpkz{&ig-E%?g2{M2M03|IyZZqb36qu&oWHvWuL*L zefpk|vLFFZ8%S2LD@C}PVuDZd4iXiDIhXG@!rit?n~!Y)ffJkuH>7%zBvWSn;?@gz zosD&m7j!~^VcrL?sJrm7e6ly!iUZ#Fh$RM>*OA9DZ9OPYgt!jX&wBgWQNVe%X)T8s z!8|kn9_`P9$4s#5_jVD4-K-&$n}6UXM5HrUvw$iqN$R(m)2QL-NsRm=kC@)-4|Yuf z$j_jP?5og&s)D2X_beA&Pv?!_T--#($Jub!^+WJYNvi5QvV>SVuBF$tI7)KNGkO%4GXF5%R^5p5<4k zA0c~yMhd(8Q64e)xiOdv`G)(6FBK&sS6yPr)~f}cm$cNG_x(n4e1Fjt_Yi`f+%EjZ zmx1sLT@~$DR1lW%&f#R1A;N5><9enSQI#p$VJuyMitr_;P>%|vb*hp_Jtv1B@1*7% zu08}Ta^6{N4+gEJ&%@=MU#Mpu3{`pIj)=UiM_#wrkk?Sf-#>B(%>fUVGnfXE5!GAG zNJ7Y$SrVFEC%zEsj(_FZ#$2$sn`zWAC*U*HvU5ZBqp0?s%6luJj=H-Kyp1^wk-xE5 zKFc}*Ld`wncH>KM+TS{Fx-o+??sviUJF5uWB&~06r+_!@V2{n$1{96HWhAFGL24Z7 zwO!HgNIFd+b&ID2IsHzSEg{B8Ng7ISHlTvN-{r>+#3V?N+T|R5LU>L}WSNJ5?MJ?O z$epA(NrdNK$KFB}L?6C?iASOpAy*O2^X(;apU6uZMLMAJ&uG;WFNb~}japLdQ*8$eD-29ZN}1H!BW$^K~au`u!rmu7O$x-o^Kk;&R4;pA&e(nk9 zLJVolq}1VZghl8U6_9Hxo@~lOB-GEoD>C5v>$do zaY4%6thgW%FG5@^6_?se32*LgUzg)bZ$)1;TyP4D_gf(?M?DOUQ$X{LZf$B_YqwsN;!HITC80 z6|b<%ph~`RtWRtNb(M6?e#9jR$te20*~E`npG&r;6ebAg5qeTy>V={(5vIlEr-+EN zJ84D5idb!uyQL2k5Uu|4n5YyGUGkDYF@X%p4uXNg23#nW87tXS7KY?kI$0SKim1@P z`TjQ{PmJE!ETOcOhs9GhZ9c0M_}sbV$X3J)-A~t3EOU+^=11ACS^sB5M?W1udU*r| z)TXlu{YLP4LF&S*MuFPYE)%_+MkK_)Oe+}FMNF%~kLTtg$S=FjEfs8qve_N!PXs$j zT+J6PJI#u+2A$K#zE7cQ?W?bpyb3xrqRK=+vLb{k=~_~TIQ;GzY}wDSBTk8vu_Ep& z{MXjLmDZ{uMzu53`^z}og?W6X4TDhIH1+(Be*~d!brjwVm}c~P!SVR<~Tr8LYO`m5RF{ui5(!Skcb>xqrRuW`>K`=YZH zej4q|4ZSrDMS03W)>C45Q~aA)PIeLDSD)S|36q9y)bnNvCMQIz{l4?)^6_hf6)g4C6EzW zHmWO|D=ah#xX$?A0BhY)#drEvh2NTVkLbOfAR}GqEz*1l#FLFcA0b1m{o_f-+_>&}=Cee#0cT;kV>3~Gh-4kK|A!gDhYP5dX=oq~P3og9Zx z4!~vS>b0Ki)7ZzT=*CZJhW4js`|Z#DfT-^Hvg}(AC@HNzb-Nr0**fk+Qfe-+@sHWB zW#|UR_CM0+%r7A4;$Eh>5i6W*NGUoYe+Zr4$5k!%g%I%C)!O9(LDYlfwXg3XgfXYM z{}EJESXmr?RIjyw{%%$OjJ;fNxaa@E)1(FLN8ObS2D%lt#=2!aGEEfbLtE`P7S7}F z+qfRFHYMzTpv+q6TM9#Sc?+Q`Li{c9%Boph2VL0#d3v%lP*;>VbJwE*zTOriq#lou zqpMv$yk-RVuM~z?sE#UZH6m@1BL|A?mt47e`V@Y!Y%)?kzKb~G>T{BN+h9s8!3?W23;49nqSR?=^-xL^>mE_dqyCO!)FTsh|p4$plapQJw_KJh?A%7GgAsr<6q z)gjnj#LL6Ax`cRG9T7wuejOzj-p(eJC4fchf-l>PS?D!=vd>6VMD+db=zD^cNGWSP z8A<&Bu0=Cx+V(1NTp@9bJ2-N1GT7mY!+Zi;E9SZ`3J=BqkvYh11qbF1VU+< z<900xIHYU&uzrpm3`H7FPtXCU+X7x^zo3PG*sajt3JJK`e)8}mw<36oOg(T95# z$@FsAd4!2LvK0#+fVUQfufPBW4C(qPR5PwX_`bqMwrD1@Llw4Tx+kG^>22iBk@q;X zgP*M&4#4f(^@&jWEtM_S}=E92*Aw3xA8mxvw z#hu+p98ZvV`km>1uS&$Wf8I*F{aazHi?70^YZP~KDnhF>*bu}S97|fyjp#QW;d6>^ zsO){Pz0P2uuzY_~Ou-}-JO)0x9XuW|+DOSyD2PJRY@ByXL>qGMa_3Nt=OO2WdzX_{ zEcDsyf5y=#Abj1_gz+vNctzUdNao40C+i4vFg1a%iN2UI%-aFGvY_Hhs&$zA7^)@n z9|rwTFRH0`O`td`aP7B|4cs-%=jlXU;7+U_cH-p_;ta{Nce|~STf^QY(CGtvS8Z}0 zn=z=~p*C^j2I{T@>i-cyE5QF@uNh6z6y|0neI~ zI%VwE4CUGMQO$5;}OD%JRu;SVFiO`a|&By6Khn%U4`{Wm&Y z?0nwoi@-L$py+h?7g#qq|14zeMr`+|fZKF5aQ!ft=vY*Zu;-23pKqz6W+`;>?zO|P zSGb^*->i&V1OIvhlSbf{ZIa$Ulz}+Dlwzr?RXDA1q2#&Z1jrXd`8M~3x>Le; z6wf|N;VM`{xc7%!!8$cCWvAG(s_?^UHNzLXibkk6>{n-wZbHFE+HI>(SoR?h3~G2?Y!*z>tHs#nU&58#L&ZxVqAQ5 zSrFvL?nU_(zBpML*5{H%iNbNi9fOMwaBbtwJ#yO^p-Xmr`?;&YKm8}~`P2eDu3cpg zRx3vK@t~qk%>X1%X(qQdr=jsM$E(Ds9MnANy;09u0N+@F$7J<{daWR!wKqXpVZn9D zNR6!x$9`EkdMe5(?E3u>Ih5EAk_)PFF`}i=TqXY%)x@i?^PAW-l1#3XM|YZowfV+oE>;JXiueEvoDkaFI0N)a_>d~GICvf~> z2uVpN_*aSZLJebr5MFbCJNEt;#CX}Cdb9t+S=FIHo%J>x5+8imBJBeDNy(W-+oZZ4<7Z7QDPSxf-4Bip9z<|yY z5KVj!y>wOp^5cKhHV5QzsA0nY^aptm>r`v()N3oOr{2v7tk=Z><|)Q&RTDTeB*etU zV*v6`6H>bBG*FWdH#`?Sh2z$(P6@7r_u?>Iwg1XDoXD&rDYUr&$1hXIJ}~J(A^wXQ zV`~nM){7k*aQ*`N0fE&|s)FE=XBlFDAO>dhQezp%RLGghJRq%l2;wbSqf3t(L1FeT zC~jpMM3mwI&*S$g{PMO+|8toS%;$d-t)-NLm&KMV%-#S({y(TWlzG8t*Gqm`{x9g? z^ZX>2y$Ox=IaY1^MC{oLAe)P)0R8n?v8VdZV6fJBC->eON1fWr?mHcXQr}Ps9W6Nw zZm3_25eh)CdGuXx2{}m3v&6~$)xu#*uQRK!V?jDmIA|MO4j!%2zB_wppyyw0W86m% z$4gg}>Mf~3Y7~s7ds1H(w#Lo`PUGRUD$KslsuRQ4tZP%zKSC(;~$5U zilhJ-uuN* z{h$gGCohYiBh)>`mqCPmClZ4ZoheT*KUVx@L>v_{-Vw`0?S~?{b2kt;~ zM|K`D&<~APnwZ=MHH%T)Ab$)DW%t)61`UGq!_y-rj8veaqPJYn=79S17(ts8%+U08 zVxV04fpZ^L=-AE^bUnK)7kl~SA=Q5I&U3@J;0yfsHJtqz4u7TZez8jD!aupn#323=prafxgY!35Wy*Wm zp)0)9FeiQwe5{?Xmbvdi>ghPGkdiKlJK~34+9m$_oJY{O*eD1(_UdSJi63; zA`e3vj#&n$^P$kGM?7-(b-)41m|Bw2KnNNgd$+$Q5p)JgQMJzN*y9&Y%x|3org{dS z4>}Rp`)IUAGrj?|UMu_uoteQP*&3BZatE~s!iO~;wJH2P8IeB1#sP_|XXV2_FJS+m zPD_W3Bv?cBM)%L9L%72r{BHs+*!=%`|Lji#x2{Imm!T#oGIP+en>~hBg>{h7a3T&| ze5fv^tqZ-EHy*rGx(oAB_PsApi$HOJ^3)l@L@2+i+WqVk1<_vnU*j8Gtm+J2Xw%n(X#%-06X(+NDm zS`qhFHw*-PZXQ*v1W6NHf8HD|_I|z~{9wBVv3mlRPZ8`ZM&YQ|!RHbn@=hWXbS43} ztGGH@QwlUWL~b9u(G5nb;4{1|f)FK*I70jHI7CMpx7S7jAw_ejqqwvOtl{QWJLMVR zlDVeyaOgYS*1{(WOZ~uTR-bpQArYLk^V`lQv^e5p>_|nZ7mt`xag{!}3qg$-mEZei z!8Bi7?H{oX#nG&?(|e4-U9-^RWk#*A>b}LYCt(y6H|besn!0g{fR!#Qh~rT7L1TX+ zM-Z*+iTjwef<`#WiZ7NKv~N_Re*4NnCE5PbA)%=M#4YTZC`xWuG=a3om}VbE0C)z& zo3?~~;He{RB7edKQe^3MW9^S{;&7|T=hkE>6}z}AvlN3~@tzY8izFB~U2n`5UWXFX z2dc`i<+#8mIH&bz6r7)r*S5au!@i#n%$YSCL4EFHQ&)pHVlJ&`#Zv@>jI=!5#*Q0I z8B`?Gmx^I;+0y+ZC<;1dKdan7eE7s1 zxOoJ9E$$F-E2>9p*HTGg)4Cz#IMxWGUncF}(w1>tBK7&D!~@7>sHP9K;>&++INNgZ_*Xl@kowx78PP7y4zNACqIZ_m0&YG`x4vGu28pxh|IH<|z(Cdd!^!V2VIOt! zQ@L^tPA8iSAC+6cg^;Z6qh-T5<56~wgrI*=n`>2&{`oZ@dkFLX)(N-NHwl zD%oS^O5hLtl^Sx~Dw`3Ok+p9ms|d#q9&CDO1SgvtVY{1h-qA%F{^ik%o4@HHYvxc@@hK6^gSqGIC9@#ld5m=F>=H63ZkSX2 zJ_WN5_b;=z^?{+}*)D-7q>SD-S==dyLUMmlnesgB?sswL3XZ^|gq|y!+73Rgxv!nA zXAoO>vyo1y35JS|Eo2Y3!3Oo#K87x^Y!pcLl!w4-PTZPvrT{UITvR_b%^*cao;XbF zHA-)s6&kTm0sF25S=hZ7pw_%aPWK}Rhet1xE(OTIk&DlR`^h5Ve%g*XRjxy|K=E1F zdnJgd{5I>}I{{(Kis}ILuOg}M#+)h_C=er`9Gz)+E zUvx+8G0zkJx*mwk_h!AQQU&MslZs+9)DTblwb?}R1v>p3&kK{h!1#IO*A}%BB$Rnh z)6&cXv3%bvM9sk6(8oaa!4EwBN%Mvx6JQWHcg^EHuVdm}b^a41Y=a@zXHWAi!#Fm+12%@JY zwq}LYq5r^G*6ipa^!a$4H?xAlzM%b=o#3aOZ~Kv`DpJ5@um6q9=ceFvGEz~mBm_~`~tKjkOP+U#I7lj@Ez*iO@j)42cTk4Fx?`tB*&yEsJN>k>+SYyuW*Ya2TBJi?D?Y3a%5u=jM+$}Hf7BMn2GeYGq6uJ`cV*Ia@h2cKo? ziA9|K?JxdIPY@=3p87j3WH1#VPP}8a4!ilz4)0Z9k6=`F!s%SxCdZUP$fZ>PVoKEbK9D*{UyM6i<=dH?A4J=kogv((F3K_{Wh zh<|??3=E2fV`>IrKXvrO)kR$x=r zMoM z?we0z^vP9l(O+uX71Tm7oyq!snfGw1q~uT6qJSNF-uID|O!(|6qMG9?f_hJ$QPS6L z7%BE__1iAM`9$RIvlnmR-N0ofL6SnWq9H5r0Fjow@E^+$T}Xupy;9!=l}GY%W_ zYu$jy%Cx*!GasBhHEu@`cqqTG+;8_y#vqTbW9MN)KRgAR8Lmppz&j|!;cFT<+&X=( znO+T2Sbx)X$@sz!0*gPr_3aXYmBy$5e>fYIhU&ht{_a6+Nl;oWLD%kMoVarIrwCkv zj?Vl3kbx)L;g{6hG?3RnM=2p00<)uGuj@JF5aRT5%wD$&p(>OQ4lZXQOm$t&piiCP zM<$+p5;j7wp0J@#s{w%mP>Q$maviEXY%+lL5FSo+VJ7=AcTvIF-i_B z!(|{^?<*O>e@Yf_J*pfB7GX#ew_Jf+8A<$UX)t=KF0znple zNtpppmOi&^etvip32c6)NFnIr#5`7X#Nf<6+1g9cRmU>@;_mfQM}o@TmQTsg5%h^w zfQX=1a2yYE=^SW)*4~#c%zU~qzfQNVYVr)x-EBO}@_KMHS>csg&4XXy(Ot5971%M_ z2>$I>L11BH$t^-2>bGcY9I#D~{FuD)=P9j7uP_Q1*sMC_a|Y59?0jExxX&K= z&%65ns!xQ!tx?I1wgR}mOWo?ZQ~}?yxOh94B*eX;{uxeFhq&5nbc+87`bP22g>9K) z_^r~5P_+{DdFb=2ml=eG#|I|bZ-a5)Fv0e8}_2Ra0NCvfIbNxy12oD(Mk zSB}fTrL8@L~s4ZOmLn16p#ZWRM ze7e`{7@P`zMigwEM*RKFQdVm#1iX4OV>w+6yI Date: Tue, 14 May 2024 13:33:18 -0400 Subject: [PATCH 08/17] Adding synthetic breast data for becnhmarking, with documentation --- R/test_data.R | 30 +++++++++++++++++++++++ data/synthetic_breast_counts.rda | Bin 86322 -> 30643 bytes data/synthetic_breast_true_exposures.rda | Bin 14050 -> 13209 bytes man/synthetic_breast_counts.Rd | 17 +++++++++++++ man/synthetic_breast_true_exposures.Rd | 17 +++++++++++++ 5 files changed, 64 insertions(+) create mode 100644 man/synthetic_breast_counts.Rd create mode 100644 man/synthetic_breast_true_exposures.Rd diff --git a/R/test_data.R b/R/test_data.R index a8f345e4..23289c64 100644 --- a/R/test_data.R +++ b/R/test_data.R @@ -122,3 +122,33 @@ #' @keywords datasets #' "res_annot" + +#' synthetic_breast_counts +#' +#' A data.frame containing the SBS96 mutation counts for a synthetic breast +#' cancer dataset with 214 samples. +#' +#' @docType data +#' +#' @usage data(synthetic_breast_counts) +#' +#' @format An object of class \code{data.frame} +#' +#' @keywords datasets +#' +"synthetic_breast_counts" + +#' synthetic_breast_true_exposures +#' +#' A data.frame containing true signature exposure levels for a synthetic +#' breast cancer dataset with 214 samples and 8 signatures. +#' +#' @docType data +#' +#' @usage data(synthetic_breast_true_exposures) +#' +#' @format An object of class \code{data.frame} +#' +#' @keywords datasets +#' +"synthetic_breast_true_exposures" diff --git a/data/synthetic_breast_counts.rda b/data/synthetic_breast_counts.rda index 75d26e8f86020502fbc95cdc5983c60ea9213006..f47f658ec7af107f4cdc75435c8107978a8ee2d0 100644 GIT binary patch literal 30643 zcmW)ndpOhYR%#;DL(d=Om`vI)e3<+Y?sahwlbP5Hx0m~D}cEOMsM0|)XGas8hptk zAKcQ$7aqTcHV7(5K*nn;r%u(>8CXEVDO@h)Oi;v$pJj+^uC!`I)p%`@0hhT4Wg6x) zx+$xp+N|bMgVT?32j++la_!7nUb$zf!Q)4`ZA#0F-e3Ck);5F3_PDr(`fmn0rD)Ep z`ulpKM@yo)GZj3c9>418sq=0-b!!8$d^FEL)@|M&C|wj5^{Lc=ZvDdZn4Q_`?jf}`EM+;_qyZc_I2Sj#oia9N{^ShgOKP*u|9HamsGU**m0Z3N84d z@V>wi0a(QTk zt7=D=W0b3-T;xOCU^yveoPM-B))@lrG*Ewx(XaC#Z&*6bj@^=`Zom#fYkr0b`MQ)enLlH0blsT&d#@MjD1<9NMV< z%>BbF3SrmW2?hs!WCmpSvZKs7MYFEeu@ffv^11&~6G-za4kBX4qiOv!w-4_%#TL8Y8&mLACOo?H)b&Cx!4{;T3wB~u6kXV zvzN46<5W|Y_2~#M$e5V+6LbF6xP33Aep-A!wLqIg4?PK@jZI!82S8Hcc{ge;Gd(Dp z(D40xeePs#y`50%KsUKFn7q}<;wmK?++5U3XV{q%{g66r|M6D)HPvDW- z&zY@PVuDUQ<$YR1ZS+Zuz^^<_Tp@7R%vP+LeQWRIqc$~6pT~PY^GF4#iS2*idNaO% zeIyRCs^?Cy5NqFTJ!-6ZKYlLp=LrQ?xjjEaz=|GzYjNjLqCxP-5XUk`bb$;VDK>Xl zOQ6nga$zYj8ieQ`vt2f9LNe;kA);Uw%6BaYK;1!sUwarHH=8gB*dkMA8`d7@sQ3A3 z0v{{ICBS}}Ya4uy6blOI6JcST!t2zXDg{*^HVDizj5j5?cG&*yrzNUf!S`%z>_b9T zj{3>mV%wE87Yp_;m!slLOZuXxN*4vgGY_ zYCALmn^HsqTAVCJ|LjlP;dAdeP!q^bW)}?2*mG8UHR5!Pp_#@^Qjk~IPEuORc$`&B zMlqTCS)-J1(ESadt<1gq2`@w zt5a0FT{EvV)J$PZ(7U$-IGeW?I)T7 z1tkThIEXJdDc7e06PcVeR6N0sj_mfU~_;L_>gp#mqpR={FlPh zeTY-5=+aPwjhY0yJd|6agH_|a#))5#p^dGdA>qP03wGC77GW3%jqh^i|zL?!HU=B+{%t2ejBTCktJcl2w7cCHYZ`M8XBl zi+FWD?C$P!ahv&f1u8)!FziE|I3}iy+k|et88zeHuy;Z!j;uN`>x0Q;hW2TMxq0M1 zN!R`K;bW3eD%)5U1rKK%pw8wUXGC*tpNDUi%3h^OwG)V>TGb3D=6e?!4E6pmI$c&*Lx3_#Dj9s~*>8b+^{t4zX8c z#h5IrJRj4OEtx?$U~CjYF_;ikDSctQfPEk}CmI>>TE%bYQg>bnwPRm;p?7a6T9un` zgVCitiCvLH_HIl}`r^=1n?K1NUw;+Q^U5*yN%Oe3a>3Y{pQA?ib00+bRbbNIGq^z2 z=yOy%mT@6{L(-ZB8A@p^I7I(?_rV)^%M6j>s&?l3K?$_mm6-aCkBd`5X6`ISuy^As zNNGOs#`+P?lN=LOZYp8-M%M^DzeAl+-B1nzN`-Ov-&Y|2Y z8Q07()#a(({H?seE>B5n*K#A6H^!2Z&H4;Le9i?ry;{u>Hv<}g{-*51{dRHwdQ3dS z_e8E?^=lMd@;&g$&l*d;IJXeXa6#C@P2+2C3xtx@7%wxKcOVY-hJx@}Bb%)aL;d00 zl-+RI8UH>TxiaJCBgwieWiaC|nHzhOr83U{n}exmX0c6aMjP;bkNwm;c_ zwgpE=P&^c9stXNke*RUe4 zX|45IAE@&3Rt_KU`;%%;J1n75Lp&s1&;Jf8nqQfOUz;Yih|lQ)q?AJcTz#MN&!YRT zK%!6AFW(sVWdBE;voVeo)f@K%x^TS+B&-#OlUU-|&JnN3ioO+QLd(gW{9@C-yxO$j zU2JfT&SCPST%iykVu%99bGtdpyxYUqAY-eGoIgZ_{>u1njq}zN?o#RyH;wR*}>FCqfkefQ0oROBgG;zt9r`}DJcNsI1Imt6xi$-rCc{Y*$# zo#YR;Uc^;bxd#r1x&C;-7>X)xd_3Tv8!UUo-lpTM_Fc*>FE6dVxA){3E^G)QBpPQ2M@f*cm(gn=SHa-YD_Wh1Da7MGS5b%-5C<2U%-BW!@y#x1BOA;Bf~ z>OBfXdd3P@ekUE-&M51B^*ut_7f)_8nQ^Uuh>!&v)hK32ocvA#U zzd z(f!-C_G(W~P40X??-Nb20S%;gTmuasb+X>A1SbSb0oH|)X6r&}jP#aQ1on|g>JjP* zf?}opT@`fIHMp!vrTB35%TxvM1n3pjvAB~5G;PmwYQ`J;HXm*JFV)GNxhH{Zw!vRV zcx@u$W)$86En8hQ&cu$#p}AhrR@o8DfQ-E^Y9(vg05Yeqy=aj~mM~@uWjZil`1XET z(|almdc6p?gI{a%UXOhl<&*8H&Aop?M~PDv$Mlq=InBnh|Moj`&WL!a5KCN{h)T2w48N9G$<-C zfi7pUN3l^9h=g|7!Cm*;+(^&fX}+~n$+cK%;a|m}5rvg`R|+AWT1mSn2K!8SSDQ4N zMp>Lh{~Dw{n(*QT9y|cg4|&hIS{%=g@O?L7K7p^!i*_9^eX=f*`dlP87}Dt> zHbTG-QR^pB4F8*2yO-bIeQuZcQ~REh{_4y)LR(~5cseC9-N@OP_-8MHPiq4;Y*H+@ z;T_o4x>IG5Uo==UQLslH^_CWJxS+4R?}NTPc4BO3iFv4C zTs}8Xh3irbo>;7wGSNFdaaeQ>`f}Kh5cziVHN!L$c_@^dr`o8uoyRf3j%r96TOUnJ zIk=y>xt|;r@)0$q7&XI;t%|zSLh!Au=pzns4H4Jz$-<07!aG@>IT9`ue=K5H<$*m| zU_+e`OC1lZu}g#upYGl=QbG*RR8M3hO*;SEMc3?*l zf}L6eaRYLKG2)gtul7X@13%s2H?rMn75B=sU|xE}>Fj_ocm%MSmc)5%A;1ZZ(4Q9QbbV zUJ_;`dCXIaauyfP5HqAA0SoU4SW3qm@(YsBpwesN<@b?KMIm?jup>YC5-YBYB7QcU zmd9+&O9|&ubgb-;k7H{=`a5l(+GzecGYOShb@$v5OUI2OUGk7=yWT13vo~_9hYiOA z>%g~}F|Ko2SAi7z3s{{Z^Ok(zEhys`F7Gf;v6!e<${?A;f|7CwRIkU>pb@ZwRaw&WLDO)}-t!(TC`4 zH&?@=&<+vvVm~%0?Ww(nWU60^&1&|^yW4+2nJw{gR|1VTW~>)NZjgmvt9gD9ANj%< zX5w>uel!wnUCFYuN_$Nr#IyYQBC0ba@T{Ht`JuAW$Nnyva?{oWI|t;}2OEPH>+jT1 zIFAQDl1NoOt#?`iEqSq*#?*2ka9~|=qnl+j5Obi6$jA$D9Zo^1e4%qAwLeuE$e%Pp zvf*|+ydrFP?)0RI7tk%s%QayCJH3gKJuI$KO%JK}CCe^4jre0$o8^<$HU_IL!jU9T zvd!T!`$TL|-aMD|E#ixIZF6Z}NjYg$HQ?RuPpYk#AdG1<<+JgoF=jYVqj0w~_F&@m zJMitKReqB+ao5P3qpYeOw7rS9rQFX9#sZL8_SC&+-NmlYtkdY)0XN|QNW_Ke(6VGw>#}KfBnDw>2 z3}#s88)}CEZ|)qb;}Ks!&fv}1v>pa+N!gQ4Aq4ALG*>~4zGDW(_q)MQ#h>CoO<=Dk z{Va?bp5ZuC`xe&K=NF^$VD7t%Y9`z=jJ75*8E z(tLXt{Elcg;a_X){nUkD&%e9~rxSe>dg{lk@L$U?yeNLy2+y`V_P9Cle(-}JjfW-; zcRp6K?L(hf9zxqTuEiY~X=eOF!DUY7QXAjIom0O)r$~J&(%3j8+WI(d#+JfZSV_V^% zSxy{vY_8zi+U}!so68Pm&c&}qApbChmq|nYF~w?Pm~YOzj-SZNf)8Lhi2E4WGqqa7 z+&=>`{zfcE2RL#lB|S3XF1t-G{HN6}LhiEijro8|96&}pjw;S$7$>1UOrkS3Qz?RT zi+9?Cp2d2D9DHuDEz&uS?poeJpyj-_((&o!6Visb*uoHx>ettyWl_F4t9!1%I?>+- zqoMEkaPkZNd8UuxsJ)^utro}t zF?(a#$&aB3R&v+!MJ8K(X|C!Gw)Wr;bd}}&O!)f$9HKsMegOPjz7G^!tos{hXmh%x zB}9<9IifQvFn(2cI28>B~_tb0jMzBl~k0=B#64y&3bvn7rY@`Mtx-5NFEZNAe&TCfw$;u8-Om`j@ElxMO8&`@ME_7jbszi z6sa9u^;3DOx@^k)Gq1S|uu`g4le-yx+$xP|UMs$y7Zz8t3eDZ6*9ZhYnMP``TtE^c z_D~|}>v3E6xyuek7XuYM3scfvl=2FLZ|LWYK&Er#Ui}A5Z_5D3wF|cOEcy4b>Nh6t zZj3dyF8+A8OVQTKbe}0;wz;#jiZh_9(Q)^~sbk=obclCmS?FKTRExj0-T=PVBvnml zjkB6Il&76UDz~w4-1d8^)~r&Et!g7wY%_Y)$8RPlUN47KD+GO~}~nsIcUaXEUAsU%-o`?1+-6&NiK< zD^^Hy_IT^HQRv;MDU)oou>Za8bW7ya$d=hV9DZ&orNAl^qJPesm`+n@)MG7o)Pa!* z;LA#TBpRHS41-EE=`=@VU5++`b!nkjcNnE`EeTCsF$#k?^c4K z8@uY3{$CKd4aPY2#_Q2!7Q66Xvwjkq;xNd#3H7ZZ?iYjL4!M#O%?Cf!vYKt@6;(Fe zD%+~UgGq}wG0*x1nyT0DeBtZdu8d6y;Y+;zDR+9kJg=*yg4}d^QpmkbAdorT5 zzOu~e&0?L3xv%$jDb3VzRc#aim5q%_u(BsrVpleQ>rIs5 z2hCORiSAX1$b$Yu`*DZAyV+WxCU!MW%lTn0HV2<1{wN_(1ro;a_Eo)t(9zSW6`0$? z+NI$6v|<-+2i#YL0cNX3Ur=8#82@TN2^&?sGow)Dm)Gj{2YCPa=6}ZEl2vPIw{L=` zBXPzW-Gb8K?0O}Qw3HR#Sf{-RP3^np%-CRf-bQmWz)d*pK5Sao@KvS0V|i1()es*# zG6bD`&4RFJ%x_V?J$i-Xh%h-+Ro+dpc@b3xwlQ7Xs-rugFe%OpK8~a=G2Acjh8rg? zsS_(%?A5&nY)>fdclIUNjPuZEi|;s#Lxl`7`iZhLD#wLG0-IUH~X!0-|)yTw$X zc5ort?3Lb|3nQi7^|=wpmkfrLmRwU2PDF(FVe!~QeUXe-&-p|CHgT0=Xw=vw^3GRv z*R-Vx_0bl$8w(ozoC|r<^@#59db@LK*7!owviw+ zYwG9l3tV7&qjP8fUg(K_sDk___`pHtd+Iog=#p2lOXU3533M1r7X9rD*bdTw(4zZe z9DfI0tOM0ZkG}HnkZVZ$=qms0$^y-H@gGX5zo=F=F~1UC${hm59r~VC40l=EkfT-i zKbXp`MnAgDm0FTJKwkVAz)PI#kNMs;J(4mx=0Sg=Q71K_P=}5OYZh9~_X#ioHJkO4 z^b{9#J!Yl#S90G3)8JQxv87|lP^4TEBS>;_U&DOQ9Kkr?wecu#xB1{y+S60h0gbXm zb4oLMtKg$r)O~)8D6%kCjX66wuq(6U7$hFth1i`Ku4gG~TazDe)W>W_gDP)Qd{g}4 zawR8}dHctesr_%lhrojn1;*F*!+3V!gP4r$-j)x5fCQTo-~ah!Q8^#2=4Yb28m&;F z#_wapyT)1hC5H)&(Ewj0fE3mIHV%W1!;PuHUfP~hFN6=YB(;ijMUk33ah{%V8N-m! zb#mmxL~RgFcKz8x>sKArU&yTI)097v7`SufeLc0jyUylG?1E90)cZs>zs=3bv3ES= z3Z~F%4hQ*QJNNfmZ4OqTuCC6)nTAj*07ng6u z!v5IJ!E_hQM*YT%jn~WLaVg7v8y5a}b@TbqaQOl5|3Hq$_#l&U@c+WE+aQA}H=YJ6 zP@f$RfL?_g+gH*I)3ps*d8d`zaP&7DA2?6i$6sy{ccW%mWv;xxXxl{_M4G)bPX6HrvL;=5;38}h`e;r)0Y!#_?%U{HKgZiK7xz#ooPP8GUpD>uDTV?Uv&$4&LwofZ;)n_$2@Z3kw2RrU#-Mg zDU*+rsq-t+zB*D*u~t)Ae=98jz(7; z`C8yZIwigDm&w_79U0%<3tg~O;dwk~!^?f*O!cb7jYSo8yyfhGHExdTQ2X*Z3opLp z8ob)K(V3t_Z3}2Ds$iTCI5b-OJ}}8MQM@lW1Toho=ePLfCFh2IyIYH$@T_Xw7RS&$ zJ~S7MBqZs)X1=}Id!cJR&&cO)%Ok$rjm1&<#==fx%P7sPmDkY^M|towypM=PqMnHtANt`>s$#VMXtfQYjB)W!ZVW3sr#4<5w)qT5)S6H?dxJGo zLBLOJ&!6UiOQOhanCPA^xer$);sV6rHHE*LyM55=I{`Iup%3|SE=DEqs%}lR+*9De z>={L-a`#~7&a8-Y7@O?jsk8!ERU4#` zpwW5sX@QM0wQdTR&<|?rh5hLJXdb>M zxa%veOJ{mRzmVfe;n1y@tt9=&OlkK5csG1P`{h21sGU>7E) zcLo}WJ8QlkQ+bDzrTZp?e2HJm=lSEEO@ubuzJ3j*{c36bDfJ;_o+$ljaGyvCAxR6e08vadIdo$CIf~I)O;@G zl2U{B0zE%vG*bnMnUC=oe;*tMZFPfLE9n1&7sk)HdC6II>Z~F!%sl9F0q|7o@bb85 zyYQ(ps_w}+t$q>gmz_Y+D;Bix$oBBG!qbw?063_eH~;0&{f>P|LC_vcyYmE`Q%g(I>fD-mmrehN;mgtSOux zLC>m1lY+OoHjv<)CmHf8<0kahM@HEa)jR*vMke(Vv)l7wT~3h2oBQL|c+@QNh>I7fHJ?^eL?KV9543JW!ABl#5oOl&Fo-(-2^)$w( z=r_u3HhX#iBau4yjY|*-6};Z$Be3{0 zTZ&7@mlmz`!F>3^DFCs{fLY6Ej<C)Vn7 zwh0Be%vEknby{Kn6zLT#E#$(c<%j?=a>EqI3Z%$V7K-t)=`YN^4^ zacHxNDTkHI6(VMmgma%1Xw%0PY4zhIzcFqkU&-63r_6Y%Igs#Mbte5 zC~r^sZUp@1vB%SsZh6NW!e2bq#A;y)g_ZR#Xof~%{|D;bk9qZ9zsz)gychH^0z&w@ z-F);zM9FV+xv`D0QOuhax7`!dkEg>ZH22bH&+9^d|-_Vjmu&E(zKrBp`0Yom_~ZMB^KLr~7w!?iJY7XY^YE=K#0 zGHLE3Y5CBOe3A##(vT@rpdC$zWom!hF;%svS^jV(s!c=1I67xTi!^q0PQoh}7uzJFvfUpb7jZQ0zmccf z?+0Bqxbw==w!+pTiB)ZQ7Fi+saZkQ{&AfX>6Tc_XVEKTVR9nQ-6PUxa@lFi7g2wNk9m zqA|rhaNv!wH3V@&iSs{B4=xU$AA&jolpce?6AOIeUMQh4`CS4D?>I2(1A5F7 zlUk>ak#>)7;QM7pz0u>#O573`WSI>P9;8(b1jAeYOhAQq|F&}@s!^V0tuAuZhQjf& zUG$+KZ$FbppOhbyt4s+CN+*`sF1>1iTH~+f-Dxd^f~z~rc1Da!;c;ipvmS^0Qc7sY z2;?zT+tOL)500-p7ewFi8-ZK#TdTvYqUI2l%^vQSeS<^NqoWJA9(19_Z8T0nS)&QN zG$UzBqCv*>F-0z&9WgT3IGfriP?gHW1emQovdJ`9Ky(pv|Iomt<(KyhKMHi`8;k|q2258up^PQ38CvL z)spM-UVyKr=kTk0Lsu6cJg=m`)Y-hMK61IprP z<`Zb*yD`Ps0X+lJw^FA-azxX;2ZMv(Vg%{KfFvEBh?`u)NCWd@hqnB!uC&{2b|#1; zJ_Jc5$+b9+pcG44zcnxS%*4JlE%%jQt8|52MFwPHM$9DY)b`xVL;Y?of19mE)7mgC zxSYOF`XoZJ1NFN#z*a4Mm>hYO4I1$~nJj6q%<12pZY3pncr!Eo~85u*i)hR!_QU!g+k$=ck3Tkp@qwZ z!W*%MH!-@(kcs#LLmph>M`yZwmU7$24#SH^K(ig{zg~Q<>^WlRKaO`MUM?Bl6UODl zTLr!kZ#1Dr5%i}(ML{H|H;f~>R_eX_k8olL&zG!DuiTld*1CR1mKhL6+FQ`4$m#EX zeeT?AnVi9ES0g4B=hG)77w1&#p5eBqu7Iu!^d4>bVgX};A7^#4OA7aI#+t+E<|o~M za5SdZ2Ttph>(!mjf_8E@^t+D#5XrF5q+dH16nd|xwc^?c%nA1Q9NbIH=zO4`Wh9lQ zKpRX`3}D)iM|GYliyb-z5>7YjmG;tr6N)CrcDDkgrFgrgSf0~J24`z#tlAy(StiaJ zuW3IBU5=)(&(fIPYkPht;mKR`N?n?9h5I2@tGyuK#Le+2RiSs7 zwX{w*qXPB9AcITtZf?_BuoFFB@ePN{JY6)PqN`E4e;!nC{)+Nb)7RA)b2!7kmt?aC zt9}wV0j~bGY08rScb@8iI}oq%K?NANPTA zN3Sb0bNzq)K)F+5ytYK^QfPRnOlXmX@d+haZkTaQ~WOtJ)y1TkG^u$o~=&tg} zI^ofS>7*qK_ECBI+<7N}GOvE!Z0BdbFXZF2{I8bBezPP4mSVuCQsw4ycV2hXt?ChG zl8%g+Nv$yF!q*KI=GrG|py)9G3*yi*>qr{R4xx>Ki^lZ}3?WCB1FNRwU9IWXe_q9Y(SXUk(_#oEiH*Kmw{ zXz?rzIxy7`((16b8~lk5xetVw|LUMStLlqWmi&jctzxH@rxP@nVScPR)7*V$@2FJ! zWc$~q(U4)fH2nkp8dGRFI-u$bWGJY+5YcXj@xvp9zjcMV#j`JJH2XepxF$UQtTt*$ zfHk)t&XzoGz2Jhf>4cLOhY;Nsn>t=R^& zFEHX|2=PVVC;1cz=a?DB!Ty~B4R+a_=X4tvF4W)t9;Kmd1?#!S-jfD?Qef($kDfJM zB!0J=1B?9_(i?VmZKq}T(^Eaz<#3SAS>k2TULCfdYMEh7f0MxvvrMP;XW$x$hGuy= z8ITS}I{R>y@+W25%8pW3JE`NF)t<@g4jT%?-0DQDRCMq#G;)vXltWDy>NmvvRZcp6 z-3>L-SmBib)cBdTLFixpNzxDS+S#wR*q+6lb9x`~bMQBM*=0p`@94?78?pDIpSpbn z{TSSVeruNAF@zWXDvgg*U_8o%a6=k&BPn}f%zM0_`a}9IYcCJOW+Dq$llGM-kq>y` zfoWLtIWtV?l{)-UavJ0~-W^^08~9a}pT0S2()IPv%-+4T`l`sNUnSpC+eFKT=mH(D z+yhJ?qzL?H3^I18VhqtWd`cT+RD=P)7Q*@Rt@${IpMtU6#i2)(ytX;WDCnF?Z+u6| zaIwq%H>#S$9GePbzq`iPQvz`(;_1Kv5CgrF7a8y_?-$L-jAUt60+A-DZf}J5OueX_ z3SVzGg#}Bx&6tm#C}4W~H@p6Ihc^)z>rz_Pt47IMUPU@r0~g><&=>8IGlQNdSNu$T z;D5`&w)i53R6zQ7DF;1QaBD8u`}xz{J5j7FD@XG@isT1522xWgH4jMo>IQq?u$|^w zFzFvQ>FTu8L+(}N)jLQtUh~yl%d@7jozn*YBfLo%wKzwpb=|7WO76W)5BrRelg=ZY zt~8yv?iAT7Z(OQ#V`Sa;S4ItyeGdg*9jQ@mQy9}eMTpl zOD6ea*Wsexq8m3^>C)(tOq@rbiu5Cy-yxm5B!?$C*ytSJNEedcx$F-V+6$~j>o>vD z?3(19Yx7l`N|~r({U5EJqK=~-*8Chm(UlEN@`u22KPIh1i8Ehzt=)Q}u!)z!Zey)= zQ6F~TIIGg%^{tRU1#6M68X&=eI##CVw_t2*h#eXIu-&HYgkMA>)*Vf7Fv+lP&v5xI zajxWAIfkcYvHd0{X^;4#Qn$u<7O~W=k`CGI*lc^X(ctN`SbKPCCQ_)*X6Qf>DggA$ z=9`9W(7)%joKupAbdcOU*P@nW-7!1KI`dR<@S&qTXoeZb1wPy2G; zikt4n+F!Rc6V%V&APzsdkK}%P$9Al5vEbVl|A?hr88#UhOZDGZAMvqc zU-875d2-j#7SJ)>6b+jrB|PmWzVEnq4(Th=YU+<{LEINKE76SP+LIP`GkWsUw4AsJ zK_si)ANmCGD*6E>H#R&@s=!`PeIPXPPuF9Duo;-EE&Lra~^kkrkMU+#;oj(CX@;dD;Ne{4Wfv9mc(1d;{= z6%lvD^&$}!?$p+cBeCe+JE$v!$q8j*p}ZtMLQ&g@GH2Tdn9-PA=_Q-bnD@lD=Gm9+ z16Bpg1v1fnoSxlxVOut;b+_hB+u51E8&Dx$u2`=IWb|N) zuUtqY+N1oetaEr?kMLR!lK#FOJ10~bDhJbc3aty}z+G?`qsdfgeFozV_*Gc!Y46RU zV>lTI-!U~&*mLa8-_6~)bS|w`h^}d`99IfE_&Y?TF6)}_fMvDx z7%h63TbuBd<#%Yytqx_kccVXO@jZO+g6?NbOV_)`kyY4DB0{80*G8GRw$z!rsKRT% z8=YJta!(rmq9Dgu%3Ar_Q4d?2(9O+@cpdN0tOV{G@K${R(ZGv;U`;b+o$;sfFg2ls zT2#b8+USAo*6-7=vDk`AWmLL>L*`=C>g4epmzL^rhEj@?(lj4`aju#F#=nO zClvC$!6q}O-xG{&56^pI<~z;ay)sRYDF&yC-emFy8w5yE^{* zP3LYGVTKh+wTU$nR%Pn^Km9FjE!f;j+y4N*MnyPv`mkVG-OGLL(IW1213};y>sG-+ z`XJ*IZP!!0|D5W{A=sqo7F}cxMEA@8TWBRe5ly}5<;rpU>k%&(jF_bYek$jQ+t4J` zWi{4r#>SLjJu$3jBPsaNzedHgGmbvZ51^20-&9m&M~oRB46;3}YDko<)GRo~2{mz| z3OVr>iNyiz6!j*2-^OYBLfAs+X~&#~dksda(t*DmHc!o$B;YFrcDhiBXhOj4jSJuN z0;Pr5jFMI-tm4e zJG#Qf&DpsnAU%v85vjtjTqPk7MMuT^{sgTF{n~W7+;KK%eIz@2U%m)oel{NkdI5!R zUGPE=!wRl24=R)x-r&=<7FD*8wMRB~;&MkLr`)J`{qa^``2KUz zMMHy$4O5mW@YaNj-Aa}2gc4EOA)$||mcjHF$zX@iVHltJ{V^ai3mLntIVSZN=>7u0 zno-g6=2*9?E?>ZEFUk&sWM}3zi)6toe9}^${e8f@#8lf5CS9I>HP`d<`qr84+%#Z0 znpCbos?lYGuNM_mh7_#tYrXAUbXkJ(l7`D20vgFSopzq)Ncr4b~MEx1YH|rj$&!a?EF~ z7FSXz(n==8Ay7(0czD!fN$&R*->iGo6>(RS{4RS%+JHJCTj$vP!Mgo-U`e%f*$gUb zFr9zpZ=Z~JEu2x^g<0JzLJ=n4u70)kx(5a&)n3wS0^VEy$DnE!gsY_?2ozLb6+b+&f|zjtVa zgSVa6IG?JjiMcdAcX@LE@b{IuQnH%7u9fb@!lloCdp6xI52NV$_eyGD9);3aLw(ZOvyvm;OY>gVZ3W8eOoYIAPMRA(FP41=Zg;8az%IYh=tTX zj`Tu!AvWr!-u;p=QJ2ec>f)(uDIL;360W)#+ZZj+7X3t4(O-r~VgAC?I#g_9$gZrN zN{kaU3cb3=Rt3Y+C>1$X-JLt1DDob5k4w|gzhbU(Ib(M;q&6z9X$OgQCYvm$>i5|v z1D>(me5fR2Wv@#zj+fVJu>5%1|EcC~ov zk?X8d{vEOqnUIZbJzQ3}lX4o;+2Ev?>rCoxPmkgnE%fl>V?KrJRq~L|=y9+}->m@| zCQWw9uz$=}sLh8r^_6HCzLONxoW)oXoc{|UGaGi*>)zz;LLWu_%34A0I*ue6JUO}z zI_)uu(UHU$)uGQyAGij-m-EZ%^*yw zP_&b-e}4lU&Z;)+Qfm@(tf0t_mR1|ucSi`nrX6Osp5j!C;hU9-r@s6;gUjIGvPg$? z%hnUV?guV$DhwE&biY#fyP>d%^~&*R{&4B7+ylk{1!$6h9EocYT+gGev~T| zAU1w15qL5P{_fdNLZ$7m!&jTP!2>5A>vWi+_T89lYvy1LZ>XgGhK0q8}9Assv3^Lb{<^n(iS*{*!Xpgto)96Z-Oi#6)bbCa-kyI>YqIS zA-tPf$rgZ=QEp##gfnPa*2>fzynSAl5>X)xluMc|@K~ z@vfkM?E>A;#4an`E%=|Zqt#}t1)8P*zr8(v->|(bLw}5AdQ8umx{1Hs#=XforGxk@ zo(1zp`utN@>~~{>s&C8ah8bA)Fie-)iA}U$%)j3|)v-Ght02G+O_m~2#Xp}vbG3u! zB7ru!64exGInrhSzeoMt*zMokVN%WmYAmP@)5^#JXG9{e$gKR3Wp;L9Ey*`NDF=in zW-#9myN0Le$Z5B+^ZA_~WP`R0zB73&$`llW+8BWha1JWboYR93LX-jmRK^8*63VRd zysCm=jcN9E1)wT!?CgV{k1KD;i)Jv*|7+|#gPK~~w!PhKL`6UZr4s=W6af)K zCx}!VMK>Z{1*8c9LJK6YrATjrf>J^e5v54)$)-psLg=AK2oQQqLK2b?c=LYm-)Elr zX0Ew@to38fteG|UeV^BHp6ku33_1h+IIo-*_mk4tuQO;!eeU%AkJ->ZSkaPx%#Mgr zL1-&eH?fkvd(O%4_H;bLge0i7bb|DQad+^iZ$AjUDo0&IB^<%$=!7%NL!$3=f>3?_ z$dS_!$pI<0{3AI$rJ5J=AR+_q?g(&7n*37Xflclk$^gghiR9zAz`W$)&6sWvzByc@ z2Eb`Z)Px0cbP$A!p5H5Ejj$}@1%{vg1G+%aqzFePyk#jkeIOq22p_+}M4Chrc;oo_ zwzj_AdN0F9o?w8eTqW3-B+{ME9BTEQyfl$jdSnB$z3N}T?-$Kco~bX|>$ea{zqNYW z-<0ywlE-+>Yu3dJV`&C+!%XDe`3ois0iuW^1}}iog4dn*^WP(sn_BE)YxEb z)I#u4w%)OfXv@+LrJp*(U=hiW!RG6r4PE|{e&0AV2w?JFIk0~SRq=F1Qj6U;c)Zym zFsoWg=F$~-BQQMkOY=w1V7y;7hUHF*LuwF?B!Wj!IO73m^|kLG8zqfKDd71RjUiHX?g8=Y33_=i$5LyiX>sXdE z7x=z+VNo+Vq{AR-C3H#gC|AJc0^)ADi=eV%4OjoteN9=`ke=dU7y_!duKh*bbg~dW z4k)cE#chgT^c4=C@!cwQ3Hn?UWd$c|!XYCH>kS)5)@c=hFXf+&w8dC;Np)vvTS!zu{~D;Ul%P7U)fWVc-AObQ6Ehz3 zdWBh-I!}6Y1Dy$cyq~VhS;DoA28F1d)RgUj!-#u99HOdxKZlNshIuS7u5RIhZI;qA zJb1}6m1p^3ss82FQAMMjcE<)~9tlKSDr>ETr0VtRa7c83I<}io4U%;X`NEm1M_Bjt z&&eYI!VhO@m+L-5B<-*PUw>hDD3|9@LIM1b@?4TSH1|GA1^-DHbksM4--Y|?It}^U zf-qKf*YW~yGoqa7E?1HHkD`9=E)|D-!b7|mFz$t?)2+U%k$)c)hSH4%ol< zumnq((j*K9jKwndRmRvVq?o`S?WoLOt){DOsln$+NU21@Q!!-C*S+tv4G4VNQA<_G zigM>_pXM)^m~P@fzh^Ja7?r561(a{poo0*--x=c0LDlPa?fd~#`9OR_Xj(x5a_^E7yi3!BiKAaKaO zNTR+Z#lkUWTg7R!l4dP_v&lPHb%9v~hiH;Ya5sOTc3BU5skPO_GfE^oD3AlIHNKu) zjTm*-l)6o>Nedm0S6^|;Ify9OFnStqz0U~IA7cFWCcSNWgs3rELd~b`v@axXC}1(r zbnQ&3Eg~!0kZ~XoeZN)w0^sGOLhuitGXfH;+*APJ%xBRw4nD1FZ zss0~?%D^mqcQS+->6ZR{zPQYjP8X5ej;t!ZD2Ed)N?Yz||AWDgxw!ny;dYCJak&$7 zTeKJeRmi#paqRqSKxEhudG$HyJ3G7q9Wb#qq!U!L5mtS$dN8A1{Wv;Usl+2ZlRl|O z%#qoaQ&f;!>AJRjU)sgSb1}ta3Rz)UKU`pFxp@MFsu*>yuN*FFTf4e z1T@$4YI}8i39uqra62%heUkWMl?@;hbleL5Ak8Gw;D6^&1XTAk3V!rAd+GsF?yHjb zsu|yf8OSp=HOBk*q&$xBl9DD!oCQ>r!&L=O!$Hkaqt>k5!*Rt&6|GZxklO`$6M0d7eyQP~T9inY3KVwyc|_4xu<7wUDrpe*qY7-@Ko2k=-A9 zohO5+%QXNL7nx6J1%j?WK7Q5y*DlUL%$U}H;w{?lQ>2E>VIw@;YpM2I2#Pl|o!?FM zZg;Ct#d9)8PawX9cpKCaXc=MOPYpC`gFPIcwdpobC+~p$4jsR38!kE2qn_KV7l0z|Gew;Vlw=Q7Uy^hRsPkvmgXr5uC@Jv->ov#e%IKd024=|59%__n%0v-LB*|vrgk#kF%DG)EdRe z^R5s3USqfXmsYaF23um=e3ZzYR%!S%s}2Pzc47wOJV?`BkJz3DX6FM8Ku{$175udL_WC0q6DPuH+Q7H0sg&(a4KO~y9wL^MW~b6n4w8kgm#shUd)a_AM|G0 zO_T!l?0oBIlq57gHr!8*>o1u9<(9 zH#VF)Cl2WEvFSgObnU0tnPxQd^O5K}R86yeDdXvSdo0ETDZ`<|-~Fwnk;~F?V!nb| z-V(ofY??vL+7a1(znLHOB`8dRRbugVWv?$Uqf>|(hj2wp_ZkG8RTD`hv}Nj~`)-|g z)9=8y7&b)Be-=m-(ce4yLj7PK>7)9Hr>T=>+D?7t-8G-SzsBkVPN;^Gab=0i^OfD1-K2oCU1#=OWIaJ`4{2^?GLEqm3GB;uO_CK7heB$~FcsRjhF}hihu}41Nb%{Uw^dMe?>dKL;pm|~YXU5($ zLIy8Dv?qTw%;y6puYyI6K2hS0=~t;yckl|4ytnwfol_5lvm>^g2TyLR7RgIY|B)?>_MEg5nhAtOOwrcNuiw=!UHZH*nxXBQ*E0SA`2#`#m*C*aJYFJlA40}J(Bi?%JFQe|nnaq){u^|Io^(=n~ntl z$#9iHQR<}Zs>5UxCQYbGu9X{;xC;dNM*=1(x^Wn3H-oa&?qZn*l{++_Hh#PLe<^C0 zbH-HleCawF#P=e+A@14%PS{x@ma^MVcMaR+ zcS1egZ$CGi?rF$3?YOGnZDQ^?FBh(C8mKfvRTj=yyUf$#c)0i!zkSWkdDv)+dr`aw z%cG6HC9;ddIsJ2x?rii3qpEC-qk_?dO@~DA-wO{AR?B2OK6faRP6>A|TK=^0J;G;mWo>}j<%&P9EY1QORcy;zHH#a=-}j+%GpaNE z4di9PZ*kL|o@;I>#ECIq z1DJqr@4a7L`pjYrno2)5A=UT^IlEl*>C4mcOq@=BETSJrX2&1L)uqKn&ejy~@G`wf zTi7ue3fU!^T}z*E_kF02rP_BfxP)Ek5}wsW&(AZjTt`Y~f|+69>o-1b>*v-mZ@->D zl;$SLL-vTK1C90iu72wgxcEoBk%ot?yKXfgroY0EZ9F&Xiok&7;A!mjQGpT*{VEXd zBIuS6bszDCf-1suFM5o`gfmP@AK2qyM(A5EX2H)!rz=lgOe-{9R$l^7A^<(PxQ2j9b^~@1wY!w~@;%0O+x?9a$(lpp064?W| z7ZV+D`mK?!N*mJU!1@WO3>Q1hAe|%oRw{9C-=hw<_UfhY^6lye^SHy$+cn?Ob|^i< z2o`9SYhhg;<9mQxG2zLS-HTltX4Cgy-9hU5*E8|3a>>BRVVj6ME&}rkEXMzN`O3Xr@i`4-N*=n?H*ctRIRO4f<^PhJ=_yn$V5msH>=)!}CZ5Bbu zhp-n5U-g_INaykZxFbwlGR$7@+jSDVqo<`oXI`j^ad3fc5DIoj(RPm=aZ%?sy&WnxnI&#|JcQw6Fy8A#gIYb(W+&XK(9MyybQ5Lx0_ zs4UUZN<$|@@3DTf`*GdtN{Rx%K|HNfzh(lAeQk-|BN0#cF15X^KR1BTt}*LazZ5Jv zv~uTijdnr@*2`c%U?9mJS3NbWDPigjaxP!__!uyU8g?@n-%X9*z2!v@RYcB(e#4_m zX0DP`s4`oa`sl7!3Q$=0UxJs}RrF*jUXhtg*#wOx)4#N1u#^;pmnG;46Wll2u=}pi z_qq)VQ~Hy84ku#0^0K?!IsB`)84C^leTSMy?(Rq?@Ih=Q*$s)4%loRHfR~w}zh~cT zgxa^<2*rf`w&|wdbiz{rw#S_1M|FNsH+WZ&l(xUV@XvLT>Zj!LfY*VIkKOl_el zufq?jR@67m3Rp!WL{r)?nRKv~#Cow}3?|941N20~U{}Oz->XXtFd#Qhi~Mt~a}mEH z5$v#ryXC7Fe3?4vv?2jdQUa!Z`wRb0T$7qdY1YjIU*edCmQ=-G3Gsj83mz0~Y;XSE zba&o1X)ost&-j7#*osaUzU9j|(Fq91Il1?$UU{UH@duQ`t&s8OF?|+`%@=1KXiXew z>vXxrqQ|^ce`MukfKNtPUU(J^QL0v3g}==rRJ*jVTiV6@ON*%FBWMmAEVfnHjwy`@ zQiOT6e~8MzO*0M$)*1Xt@V~Uf-EaTKI@R@D?Qojw-FEl>Q2Eb)p@Ze-P7Ljdl8<`C zmzxDEP}~>A?@tM_j z@sWV*x2GHSu^X=>HV?;}<^qHn&NV*DdVq1Qw#EZ@Ey%QM%K2JU>)qxDhX(~uvv z_9A1+L9_1V;_sQgB2SYx=WiN5-uS?@&k_~)uNyAKyH-t+bn(iAm)IifyG?RB>-J*> zz|xk;COF!NGHakgUTb_`RLAFW$8x8uY}_W zLgq`k?1!?rrym~oEx^o}Y0O=Yo8Nefgu$ z;y2yBbHdE#98)I6`@i8EN#G~*SKUy9-+C>{j6b_pz=NmVqi+VP)E$q-nePPy>rg2^ z21A0gxRNG4;`trH%Y@p~cI9_Lhx|#AMX=GMx6&!C+NN94k3{T zx)0iiTFF|nD13Je9C1!nditqF=>d02>V}XNjbrP(0zL<(ojw}6#g7&-5|{A$?hrKp z2G;ZZ-2tAl6MmZ1-rg6lc^W6v)xndfI^yXV|5tJpN^b5ke=fKb6J!hvdw|IR-~WN7 zOrmeU#u`S&LW{hQLLZrK)Yt?rg;|AJ6GwTNaH44r5fj4g**MUKIwqW#yFmFoAErbtl8~9Vbz_tqY!d`wz8~O z7iUfwn>_;fAJv>2`IZ?Px#K~a3m@y&PK)U;^shE+p}hc9)SvtYm`(rLnr7&hIm_c%ANk|4B{uo{0V4^q&!E;E+O zm=O?;1YS&}<%>+J?XQVN<(rIMP~A|aiW4qzVlh)iWLPI3HH9XI#{SUq`x6RIhL!QP zJ_uufI*6z(Tw{2xItioiPjaC%sU(FA@K)vMjG}He&!cLNcIe3SsUs7UO|{O320Nt+ zl*j6>!5t64BFA5dqV1IzEKegSsYqq&$aW^xgmyym7-Q%SeV--Mim#)2>tS(T6&3GJ-pNrU>2ksOl+Y zH{YLk-1LP=6_|p^Pt|51D+7anDq}uX;)*?_C&E-GD63)&W=o%G-o1If z*E@Nf*Kg|&5E^bu1T-LyV`VY-8w=Bsf2_rmqb|xD7%ql1{+6@Ic3OLxM--!IkHpaB za7t=m#hm=~_}C$xRjCVe%B-bV#HY}EY`tREj)&`OYc!6rsPY5Rihc9k- z6M--pD;GJnPW+aRFs&H27CB5|y2AGLlKj>kUsfl#;km+(xcWgi@=F_dlz!j{e|bED zy>#v)p@(>P^RRk=qkCEzs1zPgR^%hPfvqQYl5aBS9ly7K8q5gxBHs)EtNA5_o9Jhf z=QDv~t6uUw1)q4?m1EvX-`R-btJ_kaFzqHWXPjf$N3p+9os-YBLnEHvuiBV6cr}G3 z07F>Q`SaOeX6gP^p$ScQE*0h)zyGTP-Z*#T%CmfON@3JFhTl*1HcPGam<5k$xCkDK=|?5^-GMbHUxB5C#OizuNuvB@RK81iC@(pp zyCp!NF7z>wC)gk!J1(d+S@|4#h?PMZuh(lp;xMinGiwl+s?hZi&c zZKZQQ^_z4tVW;k~36h3|B(5b!j_nW>_$PGQotF$+*may8$(Sv(6WzB~SUJXh7L@jQ6QG5D&<^^nrc-ki+jplh5D8g& zbVVEOJV~T*Q__uNP(r?6^27+&YdA3}!8f;o&a4DURl?T2MZld`Eb+1Md+bUf&=RnJ@r8Q0Gxx}D4KDT}= znU+&~3twfJ&6GksKZ>rq%^H#%i|M8`o7DG2KV)@>Ae8iceyb8DfYAf32jap|ImZ#z z!CPzj;Yk{~O>x1f?Vw|RCxbGSZH2&A8h_wPLG$#=F{L-^+7lZlgazp}jZ2b!?BSA1v=DlN^julwB0O|e; z7bzT4?2;wWfNRK8E$SC?r!>c66z^5CQ#QI8W==s(6LpA zwV#RIYH&dl51qo}lmnL2SfcAeuBT0EX$2yz0K`qbO67xaam)ip(ibdiglbTqH0Y;l zhzaRLk;-zbX|iKQK!<&cf5$%WM!9u0PcP1OWgz78p1quXhYe+axmms>v^y*4 z36%B=2kQs_=oUXq=z5bur}~-^WQn`{mvF03%Oa99g4^$B=>K*+Mts|@25?fDj2JZn zc1D}xj?E)Z5?dy$C4uRj)t(cdkbNk^hAPYbDEeqnCl$?D;CxqSNQU7i>R5Dm1`#!4 zwDmcD|19yb#)Ui$dZhk8Fp(@Kf^wMP|12`I?IR>B{7utZ_^N*IR2kw5$0G_qT9*S3 zl>x;uHthkI0qxv&0zX0;ntjIn0H&F36K9O~IH3vw5B*O;0YgAhfNB-m8ogBEu^FX~ zgVuzZd{~7cg&ESa!6Lz%|8Vx6`NGIanaP;)xE@Zf3L}?U6J){@{I5YaDdaps#Cqoj z`|_L^_8ueeXh^sBn|d5rD*HrV1U);r=po#A_Jd$y8bHfs?~XTUk|$f0c{v-w#f?}u zWCyhOMYe9-?f?raZ?vDAK243drzfkQZ(CORO2}zC@uVbLmc+Wa{M{5THJ^qcKl1V2*_3%ZF7bHjo=a%~oyVnfeYPbouQtGL)+nV`UyTslOb^@U7PRm=`q-I2h-t3jjCqt^jOFi#%5`B>p zq_|=vXLMfXWDb9i;t9TP6rty<12#iC+|}_lLEfEAgh_z!?OIOu=@h9c8||!LaWhbv z`&`zq`(?3&2wj;M=;a~!X%qO9Xt6`B0}OclMo@C0lXIyM$uassBrgN_XHP$9ze(g1 zp}~JC5hmx+7={=1k9k%zrJk1oPxJ+x%m}^~)35XHB)PC1^1HJ?{5Ive0C{Oov5qG= z-ScMJ4){SQ{o&X@t}L(1p|f#2c0fy5&{4Ej@vrf_(DEAl0UHstQGxN%ajSo;^Wi7U zdmzB5%XI8D=;lkOyp}_gxbgUV234V9?`4TSDtD$ce{UGUPR)Z?;QViUW*ka8cN-xd zM&`b5z&gyRWG`LZ7%9Kik{Pr)$$+^HubrFQz2$my>+v8Db>*>Qcu?XVXX}dFLDZRd zvQO$`!nUp_ExDp5F_ZOXz;zCtB$B8~y$Ywvu`D^*43#6kd;}hw-kSg) zxm)b3HM&dQOzYopW`Bkk>p7)%xU7lmLy;$-SR!nhW6adu*CP~D_VE3hS*P*Y%=y#< z#5{Mq3+&^aX;jvG&RAsBJZkWum;=hIDuNfyotMv%3v~jDegX$`KOV`$We0n}-;|g4 zy&hD@aj;sqIndwxdoOhR65myZAkG(Hy;1QK{hD_{r4a(jCk>u#jN1-pr6mzk(ccJF zQPZdRz4}|#uI9eyNeiy%<5>LWQ{`AttBWeP55>l*r%KEQnuDY3pRoN5gT~LS!r4Do zPLPO(w1xQ!!pW&V#$?{UMStiNnpjfFVn19S(BXn;O-^Ndth0{=?m0)rEpe|9%dKen zl}EQ+f1!C()}>oh^tI!^Xd)fQz9;QUkmWvZ16yAHx~o4Vx@>72!R(DvbesVYeAh5R z8Hjy>uQSM2mt8go3%d!4&i3hR1y6)&g)ljZA1+5_E~YN5@+kwj!20NMy(j%b*C9~2 zwFqz@?&b0k33A_fXf7EhLFNanF{Qg^Wh2!^0;<9uL_#7_=Rgg#nJrT?Z1u51o$4tD zkr5yYMJ(T3bx`Uc0#B`m8r1Q~5Pd*ELK8bdZYv9;25vlIhoi*)CNXx~qU^ENqq+Ip zAxk5Tn}TiyIRe<6UE5k0EPB^c2W+X`05E}FMLT5De`!@)H$-uux);1}yzx7R$byOT zro9PRk-$;M#^;Z=>hb@w3{;VC>78+VQoYOZibPDU)S5er`3d>*Y(VXZw&s`SS2f>q zztlue8HU?L&iz5Lu57lpcsf+72jRDB5<{^|n4g9Z#6N;27ScE7wWby}mY;e;JTZ#u z+3s6Rr5_69w>Q>#TO2x^(0s8U-{oXb?@EKskqFztMj;eb9i{% z9B20A(#bQM6{K7jlC;Z{sh2}i18A*tM26q0D!2Tq+7TC{`W3JjYrc*x)|rUrY|RD~ z07N+1>6)~j@Zay^FvGBI{W9FTy<*=@k29!`kr&((0w1!D?MPmrx8Dws z$%4(%WqE5!irTm|D9qVhG&?0GG7>7NeD>}}ZS zyJ*yfM3a-T$9LQo+1{5wd!5P=qZ+}$V`19TTN^gr@eP4{{_#hF&Q|L?ynx%&SrUDh z>d-N+Q_^n4UpecBP6m#SrPMoa1#I)(x$|94NxjkcGdq}nO6i3<$J`uOnyWFP1qLD8 z{mNC(DbEy=!z7{Le;_|dms#J3A#Bp+7v*gkj_JW`{;4O|d;VP2|M1Y9|1H#!wc(Qq zUP#|Og16Cg!;BWg{8Eo-QR{NZ1pH#86F}J4EQ9S{@XbZ0<-HTx8~E0xuHb`13_)uk z*gCDAYj*o1;Wkp+4+O3H2Lf2-@?EudixL}$CU3Mo4w|Vv(zc{;h@rhCGTB(i2`R(< zkFHm+-yYho7$NYv8|cFoXSaukHvNUk*{p37r9R$CK!asTT;bt}%j24;hSADagTJ=XE-J3eA9r3>8Xsqpf5#*h zXxd@{SMkKdmgzDzv?Xg;M~U*F5`5UQa(jJ1jH|Q#-^~_FVqJ$KY8SdH@rh_s%c=~H z6m+*V*$QQzZ#YgF0GKGnFIBok0pvmK<|KKq0u2YZTR@1D$O5>DLAK*C8V;$dOfupK_GMPBLWq&7JM{i}eOK z{`qNHlm66-)m{|xzdrDOl+PH_poBVqFZCpWhXO6}^I>jT2F z!dTF*qMvGM5jo5v$D423EHH%^EM2~Vt|7|ADD8W_E%A}RI7wsv^FrNaY2`4^(mZ?k zh7|F`a{Zj=x2xFvtbDoZ;NQ?e@!*;CSP{I>T=wJsBfIA0C{13Ek(|ypGeCOay@>vq z^g7N7Jl}F#tj{l^*VVbyxr8r&DtJVB#AKw#S~v<9jO$ShG-@qcu-eTM!G-aw19$N;I>(Ypi$(lS zHAqWYA}e1R@3Zb%;VL>5DaBaYyQ6Q`cHJbX0d9;A96|B((oc6SR+G{$5#Ll#xX^!u zXe?QbEPi^tZ=F9F9$pFxe$fA@TAppo>V|0K7<9jGEdT=52N94+>I(oFL zJpQJD-B6?CfaL_CWFZ5`GV#IgPRrER*HjrE+GE`xgtCm%Y6w0HPW>I5rc3BAl-D#o zA(L1>@e*!MXt4hIa3QABjGg2;5TI^3^l?ONY>QjCAXvXHzSF0Za?fQLeh^u@kLzM! zT0XQ2Gr2Il8o_+jbG~BKOH?jM&XnZF(@T=lLPRa_7oLg;>G9lLT<2rBuQ3w5b9mm% zveT%YrJ;{G6of{Jn*h!?*DEqt73)UGcc`7Vg!>tXW6#G@>mDu!bJsmV=wuxM++7`8 z7M$>0!H2fg!+0u-o0Lwfh9J4RKOjdvf{&C*sXFk#@mCgeaZeR=oGStz8+3l=p!keX z?qk!nC?Et1JEHgUFr_B(4HvRw6oAr4a1-iT7BptVxJ^pPSLLEIMuh*9psxXe`N_X% zxQN4ccv8KxVJnR#6C7_soqA;J!-nt=VTO7ewm7Aal!ZnHBm*9*A%nXfIE)wMa4-9| zHG=8G@j`8w);pa+z_);svy15mEVFk!;ZI;JIb#J16z!C+l%MR7LBDG4zgP0=5x|Dh z(SHBnZ)Qy^uG7aUd&E0>E89Er~mGN&}W0;W$Rl^9qhf#5XXvUrn zx?n5*W}xzU!r~>wVJSE6^Q*w4DT+<`WTC%HS|v_iWy*15wGpL+-z&ilEQHDH4tM5y zMrhksTcwZ zqE5(%-tN#g&<<6xIo{XZOddaJRru5(LL5q5eNNiFw91VC|K|CxvxcSh&7cX zOK}n3eiLwn45AB8S*M;->^;P3%z~AIq!*I4bIkgs0GLyrK6QLTgKEsz`-(F^Xr2>w=L63xOX+bC%;=`^@!Ivljw}5C8%?rutnM!PupqQK%=DwxbjAIr zt^~;<%Lkp1yuB=*lueuns9~D?80_zysd7J4uL&NYqtKHOxg)$=;InH6n{Mq79sB~& z&iSW9wcd&@W$?e7tEpc+z$N+qqhU!d0tISv(w=zBe(6>G7bOvyqi@S}SsyFHS1H!w zS0Bz9Zzo4?y9%9=Ux`nXS)B(J^wXpADcdG5U`yL~knV(*d0G!lROlyvS3pM(<(;{7 ztuwi`_g&*jwzYR zl^~h>Bsqiq>{ZR~yl%}_2jffw+Jh2X?sM%`v*0dN2n!GgiUTtmd*SBnw&su^*$AaP zf%Z(Da|Ddm3qrG8#ELfm{MEVdHB#c=wqK9jpQbEHt7j0s`Q@l7Q*9xp1i2Edwc8@? z5$Q+3W$I~aCxMH#Z96aRre8q$$`(K7cvh>qK8z@3G>Rl=f^S_nr5S`QEx}w;cA7n0 zzHrTcYZ;Ni_O)fe%XqwD-%;4a9h*J9z9w;}{AT}-fup99&ZV1ul~0D{{PP>>IdBegd79TD zL$ZYWX>I7J)rWr%+lrQU1D%zHJm+5_d=$W!D6s5ZO7#(oGMrz+B&g_47P33fGWtxp z>`L7`565*zzt2S=tA-{Oc9lxp~_1}D4w zG|tOyR}_t`Jek$vcZb$3Z}~(9IC#PH_~l26CM6@>SLBhB(?1(ZSiWH%+OE#g%~kjb z!Y2PDQ^I7rLGbAM912xu98WD;8-snVaqzsvLS`DEVt2gy0}K)>tDIFGPj4%ACxT|O z`rF1Ljm>nLcMry)o){Yp)^Js#`1A1#<50#?w|!J0m}d5X09u6uSl64R_}oA2Y*Wa&qSk-G5l!~%Dv9E72$X-F@qt;M1A098VhVT8(Xv6w zs@cZzc+^n}^BAW{m1aKNr+mnJ<{YQ?$GnJ+fGwL+d6E;eG_HibmJmcw`-_?R6p=Ugpz6818P!_r=XIkkE7%>9W#r`oZM{Yhj zcE6`z`p^5BL}*0k>tF=COxlf|5p09%^VQvR%nG($w5TVpd1E3<6C9O|4tclfY7J;O z?>W1tv8yay@1R=vGXr$y!E&IR!NWd8_CwZ3o=oub-&(I%|DBd4nt$MVj{~{E$Nf`H zKR-4g4{sQ{Rkaa{;iYl?tT<{|Pw+_teA^ei+$H3u8^8f-ruv7cj5Ntk<~T+4c~wSBcd8hO?E$ zNxQb|Qg<#r@>qvdA7{kponsq*XiE2<1K&ZW>V1u51*f{&r*-IUqdv$v{Q=2w zx)rQ=%N`xmANtahqKzG~HfY#%$e_kG6#P^k1D8o1eJ4MYPKONe#>T9L@|VO{-amLy z(^r3&katdMnBa(Cd|f#uX%tjHZ-<$=7%bB&f%cuYZYoG~-`f65)RN=2&oODMnW&%7 zZ0S>zfi^j_iDv6xc856Pqqksp6wOS`E@l;>$R%uV-6c+J5R43OK(M%edy2{;-P2&Yn8) zL(P`W`QP01_u4+vK(r;^(h8=~r*f_!A>w3Ew0tEbinl3wgd&=@|v(YoMVBJ#HM)eLxG^$+IfR3hpyuWpATK<3}^@F zl&GIfv@?5SMt_Of*iQ8+q59NOeZEqC+NeI3+t>%&*jL-OynV+1wcL37{!9JM+xK58 zh_~;*RN=)GePz)I`x~@|XFFb9GZWFZ<=n28ynX+7LtR<)zf`@l=zpmQha0q}XFI-K zGxO24&D^eF-oB-aDV?0Bac+p4681=>!-WEb5JUR|;xR+J01-ikTru#VsE>y5%d>fHc*gw31bMK~NSl zE4IPb!sg%hKq_T@78Fz1KH z{8-aUTnB{v=H9=u4u9ABzlIooGk!b19sK?<9*p~fpBLgM;&Y)F{&W0VTpx?$JMqD| zKgRJ}anl6$UW{Lkk49RJu4?>#82=(Z6_3SKyf?lTACDX3nNXY0$GhX9cxPN2d*Y%{ zBXco49$$@>u`#X>aa|g>#`gGnd?u#i9r1&>AlSVk*tj`%#$}=QQ}IM>inoRu(Ubpe z;rm+^{GA_jV;t?cK z?}u3A<8K_pH}1Xfa8daF`CcBr3o%-I&p&@+amE*|hFHCKkH71J*6Q);Q0Emv1ARU_ z>>1w_OF|Ey8JEY)K?}MJqj=dj(r?IHOk$v;7@@!BhuXIrcad0!N2A`iNY zU7l=P$TDPK6xl;fzoA_!)E(H{brmMtikjUtiG48+o(C zKA%Hf`Ia+#-m&MNHQ%&U*IR=Y^FqFx!v5^w^Sq$bsWFcAu^{-C40aAEUv!`&6JGw7pM7sSdCFWcpHz_KL2y7?QE^4`YV zLoI3jL_8VugU^qKn$nN|ZB0Lwsn69yo18qaF~=6F6H$@ZScZ2R}q(e6;=x5pKc zy>pA}&|l5nw=3K4w#PpId@c{Uk3-DrDy~6WBmL~tny#0GeKE>c4_MR4o{>#D&I|t3 zhCX_f{xgF;w&^N&@4Rv5F6^rvt*!ORPzU?&`;)U6ti|uHwVv|lja&!+3!6K0*B?LT z{P547(Rvi@ekkZ>?>B;8;@=y5LC>xaerO@bGvoNoytz@W^v=HwcK8(sP1Jjc!{|G; z=bqT?=_&i}%imjCo7)@o-|K`g_$ScW?I=H=+# zidY&8C+l^Mb3(6qOOta#E#$N@)KqNjI~TW{*wYhYb#5)sW#JoVZ)@zFtp7{nqF6Z@ zA8r)S;!u;9!tcZl;oe=bBm5qzcQu-}&9^wzSKM@!>(GDhipTyqP7QYF#i|hh?C@Uh z;6rVOK4G69d$d#s_RZ-)8-C<3Z+mR0pIXtx`4HPwbK1ate)IqzYbDa66{ zDdCNM_r=AwHGAxc&z^I7{BC&HosqcJTJC#7P36ts@>mkHgT`y3SZ1itTVksB>cGDI zHLml|z3(n+DFMf>P zxGO$3rsB4^Jlwe^E{+Sr{mbI)@V&5oYV3^jC+lk)oi7PCH^+h)hg?^OwR(zY9BiEx z?B5yu%k^EsE+32HlGq)af*mo<32)SHSE${}us=V%)q~!s+i5{Vy)uraA=Xu4|D14F z&Gp8O!M7M^hdnmth2Kbijcl;#o;)E>d8m(moEwWnEoj4*7@ZvwVix02hsT@K+qt}- zi7P{#Gt-LTp#ze*1P^${U`BY{6F!*SP}GP`}B|>zhXKsZ<=11b?f8Jvuv=kF~SWWX-3T z{CWRk(AxZK;ky^(SA$VDX%w$nEXZzI*Q?x*bw^K{UwvRzTkg;10BB?OAGgHjaLON9*G@sV|aIE zTo_{EgYLaQU~@6>lkXY-rP15j;rDNS%!ySY=CwiJarox*IbGKUjq>n&v?0cmaa|)j z?s+#a_%_zTRSYD*T4A$=_+gw>xUSDV(nkF`N@>z&?9m zNB`(~HrVFZ9q~9D{15TDV^0o#4`}2qdxH&HxkDE=A&A9S(jt`W|R z><)F17yoSd{ga#8SlibFgKo~O^_D$z{)ZUY6r))Exg(}wZN%_9B`?1*wEudr%LnZ* z3q5FUd@%Ibg5Y;=h?j3RX~vGY)q`zzoQXkP?l|M$J9pSri=hYTHN1C!cqbQn48EM1 z^T)n%Rt!3`KWIPH%pG(0o%1u)L9dC!dwr&dox3Ye{@C;9t$6LZPam;}(b_w45B`Q& zy>o8OCg1+}V#D5rK@(?~6GPs7o*wR>67CH(8~pP*`1AgiYjZI=mzVi)?l)BaMt;=D zdw2PxiM71MIn+f?&gDg03_7r_Chqzhw6X7RIA<4AF%B_^Pw&Z*Z*S=Uu~~~3^yX)A z@Z~Mr?hQV~<*wM^Pj2qh&bfYOgS{tWPVh$${Vo=eq ztoa%=k+Zoy_pS9I|Gp)@&G}`+_r#Zbek0U@{`UE?FL&_ACY@*?u0c2F-ti4*)`N|y z=6;9ybypqqo4nbyJ~#OFes-*$jBL@$J(^n&)` z2OZcM1%JaEYj>RUIh>ojrj$Ivz6a~!+qTb+ocR5G&`4dysK!QXTodxXI>xa*P7S{Jmmj^wEe5gJ=a(;Ycf~Tq zOdCFFAqMqRA8YT`k`~rtH@7$V;z!$!ANTln?hNMk%-O|M%$wkBd5lB;?#+qW z!MAg9&W~~Mu`+06E>HQq)`(kbIuQzWA_kJUM z57)>2!5{e6PxPaukso#8S3d61oo4pf8;3pL%93EmyP+;@%HO{4Vz4VOeMDEV=gc2J z=Fa&a&iNhY?)c3c>TqxKOG2D?$DTMp)Ou&o>a@vvOCx>UaejN~)0H8Y8{*u_THMyF zV@1%NpAW|sA*O$fn?wDdj2*E(=y73)V|Vax?fl<|8eA6Mc+3B?$z0vm1ix2?T8ovKi!tPdotDwI=UL1VL^Vkrho)?Rn z%V}KA%rr)C=~ljcl-QEnhwl33WI34Tw$u^Wn^TM(9)a*>tYPG!ftb z4t`G!TE0J=siB-cA1CCN-<|Pdd?DDDyRq}>JlHq4cTngtdhqYee8`t?evK!Ge)0`K z{PLBzxqEUvGUUmYyoOw9;Lkaq;oeCFQqO+WkMvFBV*tqt#fKg2R8#Gu#b20hf_p-`XgacW!_8-lj{ z@WGC_jP~U%FMG7-TW*6M=CqXG&|l8nV}nli2i@u6%v#F=?(nlJhFtkE2VLA3D}Ubk6Q6HU{QO-T^MbvP z1lx4y^Y&1e*`YtK3g>i8sfk!bszfByFnx4a7R4il`ntpo6DDW=I$CX)K#3c z_FLzT{a5NLZ)-WjoDPsX+x)_u4SG$5-eb?1{O1JU?5NSwa93 zZRAr8?uv8b(zrR6#flKaS(CLpc*zz0azIav!Ux&w3tPXvpXXgh$Vzw5W82F?+KkH&q zIHw=|_2I#xcdQQ!=e{TV*8cQ0TjF%SC1_^<_d_oH30r>nyq{i~1J zI636WuRU*_(}_M}qlq=FhZ@R>b~Iu~ooIDt&~I+g-I+Z3u`fTl91-%=zkGr%y7}YR zxqcMO;D4yY*V=n)u=7-WF+LRDJR6V1-e7-Uye;kw-_I|`y+JR2^z3D^G3fn{SQm2i z{;v2~JQHg7m+{8X-(QNG;_}J*`o>3Mdu)yeVq5&j_(1UU3!xVl2OGPBu9wEvxHjmv zEcBZ@YPc!v-5zoguiPGw4dI=d?1`PB7V@|x&Wej;N$@9cTAUtgN7s2FE^+IRlY%z$ z4+Q$0#204s}@Ed~UcaUwyD3#6qLjhnVPE zOrw_?^|Ty+Gw4EJ_w~f8VBh_9AunVf^32oV9UwpLCH)0aE zIP6(NOz=)D?Axck{MAq%Y`EvnkR$tQZBM=I54wxNoLf^E>1T^g`wZd4!V;$c^8;?U3X^;_Vs{+k`cTX7DW%Y$Cl>gB!~ z+h>PuKG>#{7_8Y+1KN1wt~iZDUk-J+tvxwAlk2l_MzG`TuAr577sr)xVX*hH7zclJ zm51Eqz`q>D>vvXs;yE?c-MKoPA9LbQvakBrZ#`ng3!R8C0Ha727QL;K?T z;ESeeb_!p;mKc{p*AWxeHj2T0zUNLD>cFqsu{HF7xYgC@ zp4!nY?WXVWH{562ncB0ZPYw?881;+(@aMh06`MaA%889HPDZxbxG?nIIB0Nni2sz3 zgWjMYKVpO!tRX(}&zj8bz4F|ec6^CXTCWIQOGv+1tf=q7jYGjoER*&lTC z)_u6=++0m2br`YxUxUs6mi&DERv5n)za7s7?S3Yg`?_;M^Tj`Mzu%XeUlcU^_4q*C z5MurLxHvWh-`P3%>E7wHsZ#w$<6lQMr~f(g?MD8ti@oue@p4=kx5k}?G4B2E7DIjx zyraFl!*_98yg%4_ARY?wtO+$cCHjV@|1-Pq+4{5b?${PL#}DG1cyow5f7AcGIknM! zz2N-(&X!w~;P< z@&91RvA67w-rxA4cq0CLY!AA4qZV|(I4%o%)-Tik*EQ1StYB|NJQ-@aBkqh3hqK2+ z+?U3+ar$Jvr_s6lOG6!wh=W6Y*rcNzH-@>L834d&UFYXEcJ{iY`8hOtS8%Kw|aflP_y}E1Oe?5w4|F<{Fk$v?T2VJZ$ij5)f zl_94^eFJ?nqYp+tX8vt^G*Dx?9vTNu)_mGKEM~<)llf7N-arn^C!?O7AI`;3uX^Rj zImuIQ?s?~oeR3n-gFkC=io+Xc>G9(_$Z<)C)p$lM2>F~IX9xY+AM{as z_Ad*u$k}^y$i;hk@h>m2s_#4FlyLsTSQYYnS1gRvg1sw491zz{F+2S?H|i~Y`YRz9 z_aROro%qs6{K(ztjE>@?oBJz*e{=rXlY?C6L_VkOsmn>>y_!JZu`9PB4s*8o5ChF= z<-NOV0Wq?r9#cUhag4&*&}6o>UN+M|Ir+hTO?-JGDoIOrf2akJ&l z;!r<+#3puktleQtE=D?;8|CbsKXY%yVD65X*uYeXL(jRRCzge}v!z$$EOs@m{xd&U z$#yjFo0vhP(}GSkJ}b6_c=@B9-jbKPmeY~r-hH#(}DSiIBgr^dYS)_1os#9~cTx-STM z?hLu8JD=+3t{4Y<@`oOf-|k@7{c167-~Ic7-MPWW*}*q$z1tq;Kf1c{hESUy1dZfF z53#W8K5fP8j0Vp2hq%RdP{@~0K4?3P^3Z#1(ApbwcJ0HSkuBQLfFBy$8~hFD?uykN zZ@n2}pK3lk#N#~;#Hu&E7nd{l=_pqB#b)iky&*UHnbVc_gI_WF(*w@oy?mDh{eCal zofmB06XW1-P23%e;+j|*zL_&azO<*SxQ#RugF47VZuV#~+&7AgO>xeTAs_w+9r%}* zJA(~#w#Cigp!ML(o*dbiC(ZSPkuJvJ-2PO|33?B)4t~8?|8dA^6nwio4(F>vY;@sGvDls#W>_W z^r1V0CVqQpKj_M@oQJ+=V@1##;_zNx-ttR(xfto-Z}7#ByyY*JK?`d-4RKF3mkXPN zE@I(-sEZiAcg_~w#o>G${1374JIwhPk9*#NCPR&yPTL(fivPxtBY*s#9dx&+A1|BC z$Bp7$9_r|=`$qeGn2VV${=_5>XG5P1_vkyk)o1R@1Ktn62YkSLur-8o( zAy0mv2ywBm&Sy>5dXPSwChMPWJTuOX-7(bN$e+062=Uk#1E2Q9AU1pEeDN<=d5T{g zY`L%Z+;cYEwPx3wL1!B9@&8}9HND4{*qj^Lf93bUk8j^O4fNR1@8*1o%eh$QgnG!u zwnSd4s3 z#RGA3sL!QwMO+ecuzz2i9%8*b=yO%D^Fo{!3xd5hF+b*pIO!x$`ibq-I6qd0`|pl7 z#PZ7G-Okt^r^Gkng5b+<)E`D?)Bjz_P0c?Se-rfcO^w5Ma$@*a7KV4UQ(JM&n=o$F zLmNVli-Ygollj`l^eR*XN!Uk zQ}M2#r?b1_#dtWJ8+XOlm=)K=7vuRjFg_A=R43oiwQ+Hr5puUaEtZB@&P&HnG=4s^ zKf^sfR>n(lUtAyVdwW@^yS#6Ybe`ciN{rQFL>a6axP)D&G8MN1fdgG+fr|xfw zp$?Zfmy5b>jO`)b{ew>$FQ2T%PNxmQ{sS?NSBG!##ITQ<;Wvd{ark!a%guVIBOTIn z^v1>=aZ#*}-w(PD@%(Y~>M@g@@5cY?%{LpxbaK#xE^LcqPOJ>yr5>XX?ZzQ4_tZ%4 zi-Z1~gI4_8TNL)$VUMo9tJlZu;PdFvqiP{P^FzZuJ`M=Ez9yWDhbG5`J3}3AZ+=aP z^{TioHiq}Ng_^9ItmS!S(1tJC=}Gajujb2QNwAl0(|I4;$ew$fLVd;kVAR*sdH;3e z1;HO}-x=q{g|Rzc2>w4G=f^{FZ|E;Mje|cLiG#*$(c+kJkG9Z{C&$)cUtDsMzZ!Ud z`eeSZ(YZV~1iSj44Rw(>?T?+Hf8{>Z>A>a(1;64{3-O4Pub~e7vFrZ)7__64eZTp7 zNq>90EyTjt1)ULmTIyv+*u#;E8b~4`k8K{ zU?1l4l`H%F*cXR=v096lK0_V!m|Wdo5a-0app*RdjWr#|aa*XJoURFP_+Z<2AP@G% z=dCk6z+U=}y!Bg{ext87Zi;byI@ZJ~F(;-%UF`9HPb}@t&c?lQSG+Uma(wW)ENC+h zT6O=&nmBg`_vx`R)O{TMtP8%!lab#cKDAj9dSo0lc6X?;JoN;fXh8>eM&WyTRq#m{ zy*c#CPzQGPk^6EQhd8~Ji+*R%pL=2-2Vde~Z-|9WbH2pGpZWgb&qzc0&4}kipNaqJ z5W6!r&I_8UrG5H~>r+9qEpbO28e)|r|K|GLT5Q&QiI-NdjLBL~zFl_YDPD6ixa*F$ z*6isi=gz!ye-v`F$CtG?uUxY;#4_BqH`HNqbKj;jnk)%>e5{WJA%ExUv^4aPbKj9V ziH(1Hu?w;2LGf6t3%|4->duxPbnjPUe(*gf?uv!Mm$)8_xiJ+R;>tK9)Q~=OS2HzW z4}8&rR$`#D`+C!!K4Zf>vB-xny(tE{?GAot$J+45oUM5=V*>kpS<6|j{Lp^LU5s=z zI-{$1Q?VlWan~6w#9+;zyY}g79O6(1HL@0yHGPM8hWFypOa2CJ=)<18mxsLH7K?)R z&M%tG>BGm3;hs9_hxI{+LxT_ci&amki#XV!3C-p1o>-0IbM>G_E8?p7Scq-#Nmq0J zhVMcg`jqa@cfgl-bQe2c;A5zRedn|?(uG$1urFWV0H5wUx8~0XvAEBd zc&6gd<4Ylj_r|}8$Aa&##E0UZ@b-!LLf8}gZ^vh2L&)PZ@k~s`ym(LSjCJAtlZC8y z-c;++Pc`znD_#zEABnT$*7#)H9T&tkad~e(+W1o37XNo_iT0=eJ?_rN4e@+j8tl!V zz{b9CUky)-x5W!_amej+A(ks+YJ&Rjjn#2pIQQOKjC7FO=FnGqXi;1g?j9Zb`L0+W z3u8&ph2NnL-)){g(>)@l|0Cq=Tpk~eTS9#wj(3H7YW=6-Y-#XC4}HwOcy5YyaY~5w z)VL$$c1V09o{rt|+woMW`ML4#cw^igH2iq9KU&$S)^z(}Y>KmjuGfcL)j*!^&5PBs zEZp4?X9TUp{?4HLTjCRw`2&qthxZ?e+d>WA8kfZF!KXarc|@>JhXZ47sKdu5Yj@~! zX1q3NMGJn1I?#YlhlN^govddyZV#IHonn7Q?1_!xTzn_RYhq^1iKByU=gZ^B@C}bb zJP!ps?yJFtadP}q&`>_?&|*i3P0XJOXTK5he=zP3d3i5JeRgQb^MsI(wRnz+Uw7#!XM6H>e@*ZwSN?|_^}5)^>8%?3llvL5IM~+@YOOAR8}y(%T^|km zt77S7&G#K4=Bd~eY|})JuMhs@Xm8L#tiv5Kh|l*eH#xYsAZRYfpcuu6{%B4Uak#%I+;u+q7nilV zIa7b%lC?Na4>qlDh@qG7YA(ldi0`5p`og#)=Jo6^8gGxA;(rGp?8;4i=HByR6c5Df zY>3yMIjz0(9q@0@yP@Z2H)l)zb5s73~{^XpW!dW z&qcM~Z$q3OY+V-T#VPUeQ1kQS`S{6T^Ubj@yXz5Zj_SAa=+92=%NdNB1|<`-~9B<>AiKpw*Qj9(l1Xj=zqZL*7rt!jPvov|Sv_ zLQHCOY>a~qafpW&`kWU0%?&!=5j5Hnr-m4ogmd@Lp3M2-V_qB)2gU4g_mB|J>ew01 z4iB;OtG~UI^Eu&cD(LuV47%Ri{Mt~53qmZbf{tRnA>85bp->wdh{rpzic?)~3~%K> zE7r$nVk+4GkCQoVZV8&4AG8%W-@C#&yX85$yYYdb@t+3!?#pe^*xL=UC;T>4^B>p3 zeawyRp%%--_i}ZJRUGu9qqQE_*ZH5WlbTwuh*!s?=@mo=TBijCpxHZJ|v+;ZkefaauX-N9vd+@KwMdPN=14RPr$ z^*k}y+8AoNDCFclU8drO@cpYPU;MK}FFy4)M#0W?p@(J!{mhqzx{E`vEr`M1?B>VC ztZ@F*u{(~;Bpcqd|M+B1H#K$Fy{$2hi^F&P7oktL#LIDV%n5#$$7_R)_2Is__@5u* zJSi53JvKob_wSy}#j`GGZN4zZu{Pvq@Aa`d4vLu}2EGpnK4@@MsLO#NC+DvYF})_- z8S1dN`K96hc_Bysz1qLA0|lev84EQaf29Ip%UmB$P=)S`H1 z{7$3y;=U&~hFQ=_IzHSeHa$t#rEx-tc}>u=?{0ce-t5i} zn!PI2i(PuMvoF}78J|O6&1*g&R&}ALyL6Ow0T>6ydAK6~s8b=cIL z&;3LEY7OgC!(HE%wf8GS?0nN{9CGDf9O@y*S+7_h+I$pZ7mIs(h~_(kkAEM_Lk#?= z3my5B?~CExdqZvRk1K=zM}>UkN(1rQXP0le(?TumA2V6gj^;+Sl(#y0Z%-UX`q&en z^rkk%UL})z2|$Ax3xU!C|14X4V^cH zSPz`QFJI!}TP)7#;Lm(f*cab9L3j3_47Pkb>a{n-E?2cXKg4-ouyIAG2VZ=O*E@4@ zSVLZ7mH(iFeS7?>iGB6aL*{D6hBq|j1LksYM-I;A!H3bBUAY_GGxx`y8o6gr3`0y) z&E>Nx_?Fj0K_7lM#v|do@OE4LMC=PWyg!!5oS^lZP`hz>zaaRhrMTwC_v6ASj?wJK zH^#!|r!{Vm8{&-kdhCkbp-+E4Hh1su8b2IAXwIkF=^r%{$FZTu=Y{zBhZ-#lcbw5( zUTiN4zfBv0E$d?X@we!%yuA0;$ewSbJf`cwubR?HT;ic8?cLE!>HoJ( zZ*8vbkHyn*RnUix+v3iU&$c+XH_tad8*23Lz!u_p5$2%tDwnp(h8oeJmyFG4>4~Kl@I3V7IK@)c6j6*{oh*f;~pVpeTVm>!E zgg1PQLoLKppG?PoW+U5+LSA&C1ID4gvw|;ih=~sTtKHFYWE>H%3Vsidso+@($(bL1M?V^8isws_kD33oaYuOf_Q`m0qj%Q_ z4NeO&?TIv)^}|f5*YEXS+;Y_0gU)i5BcJN*Tx?>J6J4OMoR7mBxq>aZIg^Xn27AtF z`U-KGfk0&7TXl-yffgw@lWbYJ78C6Kp&jzY*^U z@%(Mv7PO+ttuYllL(T4wbA$F`x!^~i#q>W{%5TP3+WUN@`RHFYJ|EA-uGX}FIi83c zTgzjrH$T;Kpqe5{H4LeE|q7suT(KTe4yG2HcAvpV>DPh2{gf2{GsxIO${ z-4o(*z9Vi3xm_FQggeK^!NK0DpzF$5A8W%oE(toA&kN^!VyFWR7ls#p3?b!qq>)X4qg;^@#9&fVw7d}fG2EPBlSi{kXqdo*Qt*<`)3adD`J-xC^+ zgLd+zLv@{g%ip2lu2CFf)*A;-5C{KykUyi?`4rDdleJvzdnw|5v@awKQE!9RpiF>GpxZIa2!%!1G#7&1;!8aS`7-D97D!v!bg&f}&--vfl*6(V( zD=rUy9*S=TEot}7cseeOwei`wFU}AB^wHQ7+r#@?3-iw#zaMEd;|+};h^_HRJQ%l4 z)_WV-el#wLJ#ks@Ki>GhxFP;m@O^rm5%x}tskk!U8MDLQDdGFMG;&GqG7stH#=b+QJ zSQL-Pd9ga=_?gN4nMN8v7<8Hw@*2nD(7*i54_eVltn#pkI~eX)yk zZ!C$EVpp(j&DV;c3%uiN|B&y3keA<}S;4Q~JtCabYR55t{#!LB?u2mQsSj?VOrp2^Rw zZ)U@O^*i9dG`cV@sS8+aetfxTp8x`MFH~cH=nQ_np&A-|822@ZGA5 zGun?Qqcgg(>)Uc>l(+iuZI4}f9vS1XJ~q_BXsuu6>fP}%GiYzkj~ck6M+W`XM{kIC zsKZ^&_k=sw#x;{sZTAL!)IuHi#UpW3IDabKSr%frCG?p5__{sPZaPQt=)LDct-V)= zhod*63mbREm9aa1A>6w;u8&`jv*N$U2jgesBSAAh)SCW-rgYs9$4?kHx@eA{z^Uo|ji|L~X! z+Flw5gnm$yi-P^_Aue@SXE}SjG2C}ojhyp$MmU!j`_6oiddm6PLDQ|l{>!26>iEsr z5Nycxq1Y1VPuA-1-si&|n)zPj&;Fo4fBY|w)p1;?xzSy98Ba!b`3tABdlfCxV~1#uY(t zy5AKai_ZjmUx{Cc3t}8!3~%+Ae7_u*h40|7pyl7kd*d~I1NSz5IMn~^aev$rm&A?n zrg%2?#?5g){v{S<&#mIzF%*}bkAh%dzHWQvx26yxi!uUXKda-nP1g7J9Y&7?m}GjULUWI z?eW?;Bxnn|@TX?#p+*PBg)yGsTM+}_Lmj@_oVLe=IDRu83-&)0&i4l0)X4Yp(a;C~ z5O0d}L(8O$6@261`6C{_gfUz?2ltHFb@ESwu(3U6sk^DDw{je4u8 zo@9qFJu*NS{U|Q}j}EyX9O9sjQNODx4QS=P`O3+hZTaYBuxFGvZTRPFX4nd%F! zT!#fctlf8SsKc$zFAQ;A9qN5fh<$s|fL`qFjJx8hkoz!S-kiPL;-a9FbN3d6da}2t z9{zaa(~%~lKWMx*Xzg3@mVfX0+85`=Y0=y1UjF;Wd*b}~QqYfm@I{|>6ZSUJj~;3( zPP)5C`(5GAsj(w`1HKdf>}`%SV`0z$=6o9Yp*`(}JV(vN;#*i6bWqc+!RM<&jBN2k zx2gDG+!^fALT-15zIiHsHa5i(@l3oeo(s1ARh$+4vVUpNRE!4&9ckcPALuvw(ON$| z8y|`8%{ZX(4e@APR*hCQ-W(78Xm507;}!8s@o?w+H-0GOLl1qcb~F-`^^VZn@>dTv zuuq@kVq?5H-1nV57#re}5c_#S6LD_{eWErrT_0P5og-r$Ck0*S#_BjFs8I8_g7pKNlsGE4yfZlYNia&^N zgns_j_GStR>z4Nc){n7cTVfx>B#YgLxgO(4(@ZQ;uI56_-e*<^=|1acg z`UaLa|5{ua-wN*sT`q6_-ncFX-Lo~4<8zb!pK0{lM{_ow4d2_XaZ`LL=%b(YfEr#K zG-mSy@lsq6;{JS`7IJoeN1Pip+#K8EtYBkaOhtK2Yps`PdqgaV%VJj?8z+P}`bGW6 zAqMdbbrqR8FV*fZe+%?V$zil(ao}&4F#d%oh zmDk0F$lpjGj6)2I;$!htYz+OXA9uy7n2J5&9xdm`Ik7#~$Cg+g55|+>?)|}^ zGyd4JFCVd;9h*Zgd@hRfLfvS!CfHPur6DHY;Zbo?_#X7o$#G(MOPj-ED(J$WzBxa{ zBL?TzcLcjLCTs6^hWk&&vY@$G)k;! zJxYUHLO$Nmd~a+BcaIG*ER0tNZS1orF0tq{-@@Eb8~bMk+hX>1Yj`UsvGPY>F?na? zN4(DF1l{H4o8m)WVxWQX;P8#G$GZ;NyUB<}RDF!h5lZ*?qln zL=3*fH~8MaIXlkjAQp3)x#N881mF6%f?bGL{a1u|)a^~d_YJW;u8IvY)WAp!KG-+X zj&FTHBl|~(GyC3%LA~TEpYa57c<=ifhg#_ibrJ_#-pfNBXn+~PAARNF%pG;-o38Fz z`&NcJys!CN;#F~bygeQWcYPa|#9qBUt??W#(-V%D|sn{DAggWcN&jjt*{!*yPnQ=nM>57o2+P@{lzdpp;+2}tt zelCv58p}?rtt7b)&Bs_Qol3W9$oW7lmG4 z9h-x`(C_N}Xo%1FRH&1F@c#A?2it0*7o6)2_SAuHa$6e9C-c`fo*rxCxS-WlA(j(k zS%^VRhdTUy^WTVL;@dIkB=&E`3n8CF<8AS^_-4@gFXPvOMtbI#gN}NO@1Kk7CU{?s zM|At0sAoq1*vQ8v@%Ql)@qC;YYWj(IDd_rC+|rw^jbDj7;$XIfgpCx4Ha%Tk0hi=YIFRrNf4BepS3VrsBv@!zD304hp{2=Y%*k zoLRpnW=!VG8*xC;tsb`?iSeR1J=VqV2mS4d=R2_>o(bQ?PX~=&8^;BotAaoOteP9% zITNcG^uwZX*B(9TJtz1-H@uV2ig4c^UB|H?oYRqCwV=zapzo{Vz{z~b+nP3Fa);j| zLVvhF)ZwR^-xSX2{I_vKER4s~hh7^)zyC~fb~ZY(`P#;RZq$eR!1tmL=_tD2_WCX_dd}-;D=bpWpoc%%=U!?#}qr-p_20 zuCEJP@=b4RTGICD5Q`YtTpZ)DUp&*_uY1Kc9WTAu)I;jJZnD zy?@A=M*2;i)Ikm1;rGyRmybyuMt@z7-)mjXN52ri7`L~sR->uLe~f>K^cnSTbWw<> zTt~kiW5+aJd}n(NJP$H(JmThrwWjcWe8jkh(j^-{bTe-VEWUx|l1e?#N{ zjt|AZZq44C;=hHM|7E;A*ts!28_qu&m&NDel%VZ<ekb~drgQU#Z{u@u zZM+XGSxHmdckxF`08H+pPcd^LU|j)+f&_i{Qh-V<+( z3qzfr4e`-|zdd~eFE^_FN&UXC&FA)bUpyGM#u;&WJRW-I%(yP9+ejTh8Tpw0-cD_P zTBy&aSQWI~8Z;Y+<=6$eb_hc;sW zPz&{S#@=Be4zXI#3qD^P?~2!l9B^%jYfjj|F~+egoU4=iu%#!Cj@Ja=;^q5w!4I7d znc%)}RZPz1pAP=yERId#OdV#& zW#LROs^!kmLqmSF9&C$O3}U5`*z|yNF@a`c<=b1f=f+S+`)aF4*z(pMn}Z+k+4ra3 z?D}JyP1py!?wX6onfq)^#W~@-w!bLWOh)=G4t3FwbhBO&Ys38=;SK%VXGhP>pRC=L zhxpZDVff8BCB*t*Tpg=BcFwoo0q1Xz4Y4`SjrWJM&&KX}D4q$v^(yVz=W9F}mp9U% zj`oiYcj+rH_qWEJkP|KB!;eueVpV)kpnvg? z?r+=|<9Hyp#hD=o`dDv>Rg<~8{#9Hb>%;p6K@U0UgK?}38lDw$m$P~;i@8B#-yGfL z$mfO-KYQ|Zcc_hczg0f@qp$B#yHrdBVYWFL;YzXb~dc_f;eb9sl&*dKZ--* zFXP|FcZ06K5&u1&2yefUc4s$!GSXqRtMRgs3t#5yL=Umu6Z8`Mj5zN{!}P!7rOk{_ zW@EhH^nZUbTH2WJ(We^U&{)hPda5=1{~Wj1dVN1L(rNVlTII0zu8JpPD!vpKbarfG zvCr7usD?(deKKy0XM;{Zj9-r54sZW9eyMklX5;_uXXNi%e=+_%elmVNiec0+{oj53 zV&hSvZ;NB}Q;o+os^x#KByy(vi}6^kcvqwE@PlznYkhEdOvOLMuf>V+uj3cu+i^(z zK|B*LOxDKVjGvF!N4}>2?^4rky58kJ{qO2ijq`)uTZ2~W^wwgO%ily=&Pdbg|DB=Y z8!c~6i%*B|MULNy&&NaYl}NuC3mgA9I-l-|@|pf;HZ9&1o5KA&gD&cDW=zEmab1XM zWBg3eS#3TMe-zh;obQdNb?*=X28;6Fsz6Ea%i8sc5AwK6n7jKSjQGZY0pK3fWj)})(bNG&D#iem` zu)!Xkp9_7#rul8*d)gE43Vpmbw#F$z>x*JTsDW{P?2M;kQ#=qi#b-m#XN0%vzA@P1 z^BwV?;6tyd3tM-GoE`}^RY$)A-qTVZ^CoM$$>pqA7)QmjaR1RbF4Tt4RiS_D!RdcL zzNB%e!yU~pkAq@Yh{@>Ocwx|SX7KgYWOV-4pwS6&aBPX=C;c=d<*6+z=~h1`r6$NPica%SIq8jEFlu%RY2m^~Tgzu*;XwV~xb?__=)5Ql|#$HswiWQdaktAs_zPQG+*!SbjQQ40?Sv*uF1z z1byy`+k@`LouO}Th~2^8xj`R(oa^1G5ce&iM)KuD|E&r2vUhnr6MFTgcwex6W2nD9 zYc|;z-&r9Z8uRUYp!3>T5p-D?-fWJegXa5USyf)Dzc%aaZF z><#|KX-^z{o5S6yxG~huef7OM=)>2BxH;6(+Y93H*b!oVF6IRN<>DSK&k-MDL z_w|#}dTy)>ntJ2DT=a^2eA}lt_>d1f!y9&JCx49Mu#l@*_*V}xI@cpcc80o7HQyET zcUPa?8)9?UH>eNkslVy6CpHER4t3B!^rMS?Jwv~l@yeK6PJi3uMtRUxKO7bE^qy_{ zANqp%$>784%&#})1iMLhnr$ih>6y32!1DZo37E&?}xQ725}q3q3^^-16mAm ziNzi03%cr4w&l*2_hK-r1wA2_K~HNMvQG>C`0yt`cHmF$&W2d%VC{UUgV@=3S1*Z) z4?4+%MuR=)&fH;VVaSJFu?%tX=e<#X4Cm%UfAGf^E%>=D*r$`VZ$fVNd{bwJGkVcS z4aC9cVAGs0I*8Bcj4%5T?+_ombhS2T-+lfEE&2cdy1Ear&FlKk_t*W*&OZVTV8 zLqbhAg*?s=c20`V#iMb0ToTUnErxZ$_POD{oW!y@_&9hnzM@ebeKQw@9`Rls6>O;U zuEGEM*fS1{&2efBb-2Fy#i8ffKW)PHjhlnL+3=$$SB*y zeIUmN!=Fid=?~(XI6OWT$4@x4QBALm>q3t7VAq|qLJn^TI?LC4vNF8ibaTf!-#L4r z-9fQ9UKY-)$6@iRxFGlzr#*JrVq;0@6|pXhePe0JW82tc@=OY^6x zus77-(Fl<)6euLHFZ=j|bw$V9)%L?&z0WLp|Re>aN!E66cCICr*#YLS5)a zfA1q->Pc_D)RO&&L%i%=80UxF?h5&_tbmfxjbzcJlB{hnMtPvGvZlqx*0Ex3#ejv=EDO|d4#wJ4TP z#s!VbVsXgNU32}WrtEH*%=c{+*V5Q8#Lwp;A$E0TpQd_fWjOEsIV#-wT)2P0x1w<3*z{=I_z10 zby(9&YvaOLAL^r4Hw9hw37bcS-q{%9x-!Jf|9NqH92oY&HoLQNc!*n0)(!|~)QA@B zsu|t*W5=E0J>x?RFNwWFFSA4Uxp*O-4mG+rXnS|)t0&{uI4u^$&GEJnKb_wnbZ3wK zQ$qgY+co6D_bWrJ*7g1Qp*Q|Zd?4iaXYu+_54yfH?u#wq-ZPQ?5#8Px?~0!UAN{vc|rnB5>Ko|YG zHHvA59sQ?fo8zlNH!zng2^t(94qRuqEDdcEhzBk4Z!QTDBKR@De?x2_r z{#S*3?dyLdU5^cY^JMV1uAIFKa+V9tz8~_xC{B%kiQa(``*i+9sOhp`R}a!i-1`N; z-lN?@Pl$KVu)cTf613oxrpJYTVDFGv8uqQRzc|EwV9-%-u|>yi<7Kf&$jh9MeL~*b z$6=vf{2J|BbAK+p-^Yae;ua4*-Sy0`7)BvBajS##wBuXuo|gps{OlOw+$m@=Ga22L z!-DV~xh|^L=!=bFk-IwlD9(tv@DBV_=;Nz{9={y!5BcuboL+qG72;Ae{UbIp(dXsi zj&+)!5bmvvHDPaYxa&Ql4c%ws@K7^((nTJ0+9Bkr)_fUhVxJ#c@N2X`Xd>pJ4ojO4 zy397;KlGOKvmti9t`71Ov;74jK5OhcZ?5OuSr~HXM?b2A*udU!U+?k%+aWITh*^DT zLlbM)hjq1~->IQ4bYOq*#Wo-Oss%r^*f!`THs{6c&Z5c44}I-h=Sx4+d#J0v8s01R z<;t$yd%?2mrG@$TrInmM`grZ}{9@otXO zV@-TCPKmR^xsOM2&b+Dd!}0ICb6w*BAujRI)F>XXBM!Rhd%DjCJ!voAOTxKx;_{%o z9B8pV_&6xkd~49cw{uCX4Yd|KAD|oCpu zhHud$jq4l#M|?MKj$e!a8J`Zd`d)mbbC)*$Nj%z|p3lWY!OtJXJ@JG1eESX4KVxoh z{<(O6sAWDzM>m$!`1j}hjuth)FMb%$#_xyoZ;02#d*ZF}e5jw^P@mo6jd5D2wR#ob zxQ2%{>XVPf*|99n3O3VksKfa0>c81@{W<>coxj}pe5g}38h@9sXjJFx;$Py$*d_i; zd?U^czP=uB4l&Ng@5HIWuR1>5Yjs5Vszoex=L6yX@5Z}Ap8DeUkmuV&pLW;pQZvr~ zII8{VxJGsHzIgZQq4A&Tw=~`yUkG{I7 zvb`~`34JNI6Jk^7^))dUa@S}32m9(S=jCxqygukW7c^sIoAAC}6@2U%)-Mk4r8>wv zEynhCYSf3Xh@HcEK6VSSInTf6-NVnGA%?-;8O_DIBuz(+t3#~SX#DR!tJzq?^^G)I z9RD@8#JfX{ULN~~{P{QXBL?3r8i~&+9_xFBxDE}r@<0B@h=skwLJV@Fg|%14s$fGL zdxv}51?~7{ciVVr$l1F=hXo-Jz?*Wcw<}^bXLPl;?=Q3oEOiA zeR=tYtJBKhQ$NauZtlBFi^D^#C#AQ19*eXZ|5@=wbM^mf+#Y()`K#iFV1q8##WB4P zf7$qZ@saL-w~;pMVsq%>HKD%dX9v9wjZG8mi%CCR9(?%D9})c9cQ0S#eA%YE-Z?(R zc4Vm0)}W_a@UdUeeKz=`gPN$lcM9b_W<}%aG1Nhw)YyHayj~t^NV6qDbNwnOYkcy} zmNom%^QY!7iJ8ghoEmyI+T)u&{^Tu&uY}xYC!<=M+tZt8#cT{`PHFDGJk?wuvCYSh z!LK!O=|RxUn$cNzg{YTCa#6R79Ui73P z4ZOc%pv{qSTsTMTM}oF&(*2we1C4JATId)4S`hY~T^@8dUmxCo`?GOOsQdOICujA*!q_KfV~?=!U4L2Z z6@J7_GqKEs{!ohptcUPoEZDY z4})DA%m3XWE`37d3~Gyl%f271H#ptG~|)nDT0bAWI8 z%ud#v_59M1$2Jq#+$l!EHjUMaj&ilnH{JB;5aUn>y68bR7KOUF&psWFjKQCCdX!dl zy)>MW!;qI+@aaBXLNC}?G|c7YwHJxT;(k`e%%w#P>*HJ-J$EgVGo|I%M<*vKNGaI?>(hC zXe0*jjCefj8)xU@zPKmoVt!|w8}=`c+kzH!KR-SgHwV98iA^yZYEE}?9}?`X3FqV_ zmU13BbA1%c=xdD+M{A?^HR_4i#If-YaeLel-n*YfaV>av<6p<$ma?8YJk*XhXT-rF zCbilcL;lvCwN85)UKr}TJWdY2%s0egadeyz@?rnf@Se#UpW6+!)Ws1#w$k9Cya6Lv6g9{J$#X!S8uN2f5L}_kwQp-yFvW z9n{Ai+Aj<7h)>TAb$D}g@v_G!9i6c+S9!^kFXIn`_WIz%@kAUK>TplEb3*JD_lEe` z*eT}f;d2`QC>Gb-dgMbm6}b{6(bA42}LQez!Z9Hm;3BVrgs% z`qJp6aBugZh5Wq}&eC*AsGIsa%kG>b<0VSS!{JN^E-c|Ew}-!$G3Y^aTTzB%ah!O%n3hWyo|ym!33``sHgU-*O8 z_6t2g3$gAN;^lKTb_ltOM@_81GIoi5L(HD(d~~=w8`kQTu@-btOSN&wo*b>QD>r`R zO1Ht+j+5twjq1U__?#6t-G(~+ZS!9V@qInM8t)8oem6cAw*;M^4QJmPUu`#O>uoZ-VxT^^<009`}FF@SBKnv2e!ssoH9Xetf`CK z^p1Ce&MV@gpoKi?t>4snY0Spv5YJGDGn3dan#r5*p+ET$ zo4B0kTMz9L_I3|vM?u3q!}>uJ%&j>u4*eyk!LGS?#>lrkX2SmPOiOcT_^~gZpM7`4 z%+FAV4>iAS@_cRMTjTn0=bn%+_&YPy^}dkXgYi(T4q81HH^rvl*ZnndXzUzU#D9w? zBCSSuHa->l=c93ZToOmcMKKp*IwsDDQ@cYu_q+%14QKg)IQ8Y(acqd?fDqf^aYC#L z_ME>X^yJ!L=l-zI&ViA?@wf7jM*ZTRb7EN<$Hz|LePBb)=qvY)u|9U6a6zLxdQN|c zlf9u1cQ-#N^zYV?1N&;~9DCR%j*Qddlu+j@Lk;+M-~Pcd8*~?&cS3)9pPaol)QSCf z#Bs4Wo{tm5o#kn(0PdZ~?UbHu#4LZ-=;$A{vd;)J+3 zoPSGr=N^xfgU>$*Ha-(SjqBqVgT1fBGjUB^5l@G2$NR#4U4SIU9lCIIdqO=<=sA+biPyU&YmZLh18t{9;_2ZFWBv{8v|h2kW03w*AfK=`;FV zw!YMw+`Wg#{@ebzzw3|j?`lspifd7<3i+QK^f)*k3};>y55ybd=+Fnw-5A#F^K)O& z|Mc*kW3PT1_t?tD+1M%6=7_j5UNT{AuIoL9FM@ybw>nV`oRu{_k}wUcLOz7hO;f8D<-RBk%2h zjBJc(@bx%7_K)Y{@wg^@|GpKc#pZDTp2E)7=!NF%3X#5_3BC8$xIf+#mrVFbqxCC; zk7C*Go!x)2`A>Tv-qd(@oDlRnKQ@H>&dWy}`s0|mIF5@`V|}ROZE;du77v7d@vF65 zF9`kNS)A%7r$Z;tdo=37RTI>hzsmbQSyJ;72Tr!WlJO5KBW3sPQP4gdCR#`}c?3)b!-WO>ts)$G$k3^Uvp> z2Mzu%X!E|PCgVPQ`DE0;;?f`DlCQkPCSULBeqrBSJbcNI*79Ymc*a_=K_9)Zo`Z&R zbWU#D#x}8Yco%0Tb9-1A_V)~a#p4|jhy9@r|9A7>4O)IT{?GVYIPp&x!P9*sLfoWB_~+Z=p9l-|yLJ{C1MemyP;K0h0;5B+~L z@WJ1mp2CCwH1qekBxW4ez8-i@0L)L!zRzx_77S-7494!C&gSm5=E0=Y^3M7*GH52@SZyFo_%Y6_}pePIej9fj#vwgaH7N0faTpShha>jV{WMp?;Y>Wd!K8MEU@ZPz9X3%Y~aGz%K z8T6p%pb0J1R8P3Wp58nos_jV5VzJN9`@(yET!`VDu_nBmo8tbsJk*LmqaI<0U%u2q zPl0~o95nXq+{#dkWfRy^59bbvMZuP}17kK`Ho=-a=*{m?7yiuUE^pe1W&5AOzWYNR zo@;(XxO;cJIc^TMdN8htb0*L7elea1{Xi?aoEGB!SiC;Y3Nbt%m&BG}|F+uUrpEs_ z%6Eaj`C_~(-V^u6TjIpY{DH=|#~E>TRJR#D@cWUSkv=&)jt=YMG*{n?@%^|iA1SEI{g_n?(t(LZ$2w=|H4zFZP)iNQC9 zE&V2^p$=bbP6PHn6u%Kqh5bk4_7Iczwd$802i(@9X3I2Tp{IJcZdkZIs&A#W|Cb%=yLCrS? zo#lOE@J**f;ZgOIq7TX3pM$e8fy-9)nKe)E{!D8~-$sKVA5DR&R)jp6f!)D}sNu;$Lk*gQdau zqHu>@ww#lnT;$L9!k{xBG?uqBz5ipKwrhkvF%5N)6aQ?@293qyE=^Vjf9iff{C3bl zZl4Kza^~Nh&TOmW@?b|yeu`zm@r}P6_0>Z8Yzg)Mafs#eP|uHqI(s*bpN{%vn{185 z@{>Y(f201St?vPC#5Ei8R*xMg=o@S5Mt3oZ`{)q&ir5r`_Rg7GqX)a{yeOP!!@4}_ zss{9zufC*#J9N>*p80fEEcV#9J{KR1OGAHL5|6}%!R}n#5amC%e|_U^@y2lf3vqiK z8SGve=LfCjd`r-l-r_wwUWnI4v5t;uyenw%iMSwM6a2ArP23nqOycqdgsm-3TISvVX^j{svhWm4IZk!U^1Rwm`Ul)9Wg09EKvREAIc0kO=qR?md=;oWm zE+3x7pa$aKb^^WlaF2%0@H?r)NR9q5*!Z3J5Ahoz&VP+}#vyHdLZQ;A|^)x)8^$pF-cfsL}{~Qlb?tZiRkAn8^i$4yru;;yf zvis`!SMl-g=s)()ic8|HadEsOo(}oQ{hYWU*fl;Fr^RQ2{RiSp@qY#Fz7c%>TD&Rl zj?3fTaQ3-4Gu|9(yK`I{{LuT9xGDHiEA=kk@png!PY&<;^D(Kz_|HPIZWC%a^oM+X zH@1ZO5AVn3=Ja#^+}JC24g33rI;;1Np|^Gk`-RTOkP{u$xOZgyXQ_B-u}{d0Mg#Vo6_+)B4hv`bGG~WPeORxJ*Tt)k4+*v* zui2o3T)Y$82i<7P&XDJ}&DrB;hj4bVIn?1(tvwTTdvE+gJQnu97V4_@+eNz1h~e*} z@7%~a{c>c8^TY8_oH2R+O5?GyJj8NyVdQION%K<+=T{nkJuZ!Zh_}Wchu(gBd@I;| zP5f#+ntUJX{#E&YGLv#SJ1z`8b7QOzJ%4_v<>9e*GM?6WVw@K=zdn8|?uZY@7vt)9 zPdpv@82_Euy^XI6>kr54eL@r3?jCAJ!;?eqi{tpXCfuD1=dG!$ z9Il9CW5+ll+&eH9#6b=JmWuvN$x@qr)vBNAakWyR)H=n=4nfTu6%-YY+AM zQt(HIavSZ`czLL&wJ!u6P;C3Yq5D57j!`bpq2uQ0c_c=E;zBz69=gTwA#bI=wouLluBK}bv5ccH2wsrp5wLcqj5i9@n z^30aH=ruaHZ{IV2_T3kY+RcV|uL}9f=Z;Ww@zCnDSQN`AYjoysSQGnfSXVziWzIfZ{KB)ki%XAqUue(1Ju%rg z%F{R(CkHK@*NcO$@Y#bhI!#R1;N<3_dMNJ0(&YKS!jGD_utnyR`e%O@j zi=pm%LCk8T&icasEn)ATSRNO~3Bfo0%iAG-51_;a3add|9BXMzp>Jo|CR{h?0w&7BedTpSz6hqzV*otFh&J<~y~>>nEI z!yUFyj{`%@Y&*9&q$ze8D$It3!jh6J4zdA!-h*SLZ5eHirg>&*#3w1S@ms+TUylD5c&+?Vi7h|ZA zJIlklAqKgzKiB8ts$grjvG_)E{^!Z_Oyi%#dt#UPN&EHqh@bQv&4oNZ9BJmiD;xj) z@z)#6WprZWXX5_Q4}V!XuWbG9=G|NH(dPR%zd8OO{w{tTPsGo+H{1B1!J`k@B@mvz$iNAiQ1Bb5nE@(t0S$)y(m6D`R31@eR&x99Y*uvxt<#PR4=|4g}ZuYw~(9I zoU^9?hO=zYZDGibeL2re#_bxNr3-D@5SzKtSvH0`{9T4-g?MMM@kG2PE(>>W z4>f%#*!X_j9r|9pPsXKjYS8lWSQ{&X-&;a|-w@?FqP4zJ&tHo>;^J5nH^&2^Ztg!C zTjF0sT-U{mac1zZ@2s(TWr*2(yeaG*5f{eEadsRM&%~{i-j9Qt z%gcH77so4tAAPWBGUsdIY&aKi}vZwLBIF+ce;xX17QAjhw$a z?uvgOa$;A%xx;_?jd%%@=G)M+MA6-!+qmm%eiH-BQI?3I$ z_eWmV7f-m$rT-=ColE1L5)=sw$=AJ6J%E(bqy*b-u4`;yps0-cuzUps~S z?(xUhDCG0e&@W>7v$!DA+4k6%K3WqW2|aQ|sKe(X9Tv#vSL3VQ`ISbwutirIibpMI zt}a6?V&I>4@^D8SCxpIH?<0fOo@YZHyf@o4 zr=7i@t;6?v{#vMIxsCtKdjI749gUC0O~L+a;~POsdH-Sja$Fs>dOjYC%R`*bT@ssv z4%r%gvGLa1Ocd?BugKZ$38Z{M0n|Y2riE{@rd zH{H&O1LC>3AmqJ6ygu#3@?sSK=qHWe3ip3Mo{1ad!nh;!!NVaRaNLL8^W&9ON=?;ZO8 z=%DXDaZu3tm{13OPe(TPk5!>R)p)1_yY{z>^>IUNz zmr;HeoYmTF=*gE);BPj(k7C;)_?HjO@;m-!iP^sT4BBjNPRFJ3%CIgjcNWEvpY=T^ zYYQ4_MwYhkA=(}B`ejC4&-aB^mh~A-tn%9FP>mQEd z9rxyW%{RtikB-jsdvT~~`i)~bwlJ;zUDOi`f2r{+QSXfZx0vo~{$iZpvpAm%v9&(Z zV|Q2X-)a7f@tN*#Zf!$kYuvNvHtHq4DF*Kh?bWDw$M24K4i8#i9S=nQ$KRmd*KrTX zLH$pOtAY=o)zUyS>nPHN^oToH5;!{%_t9Gl|E7JU zdqUiAiYvo+hW%UO@pvfo(>LSMkUQPzsBXuE_?+Dw%R{Vm`9$0ldgY(O9p}FuPsTMN ze&bt1oll6@1f89^IWCxtTN-KZjCbp($#_xYV(h3h&{;;T|0phPuqgOM({l+&FLh)@bOC=L18|XT+oqqjDYH+4C#^Z5*B2 zxVY!yAIb4g;@^dL>91?+?2P15f6mbB|BKfK+kYS5iCaTlKMA%kk6q&zdQH=N^zG(H z)Z1Tfen9i;GTJwGZvGEJhv$0Uzwz^-7VnF9^;-9hm|4?aYYYFTwNJ#`;)(blLDwI~ zr#nNB8{@*bH@+3m#0R3a5ncXUcn_Rc!^`3ap@+T}x5vdHhBr)}-_l4M&*d@x=IIkP zqsgYYE?3G0I`@I5Fyh@wm3Jz8KpR z2fI7O9>I^j!T)y6*>%RAbFT>RiZwR*7K^xMW0#PxbJq6>8nDX-pLCFm9QokSXwNx& z3xh2qzuSiUY&tg={LF^){LoGwp6k8wdD`+vM=?1w3j1>4gMaI6i^+L=e7f&E-|}$A zK8WDFFnYnT8W2E zaq2n#ozr*hduGoX+j6rmCv!gFjD7m6&46?Eezp#B;h*k@#mcbnzUSGXJ^SMJUeSiF zSI6vRPOsxbTtjcDiyCc-gX4r)8-qsPM`!t^ zrQTqhp7v-U9?+c*>l32oW2 z@7`R91#HpxfY1+~4-L7|h5bb#Zs*wZPRom}LH8kM`{Eunpu4=p<2-xxTpx0NFxcjw z9%lwW=3+Q2yl)o;eUF|n*kgm1Y>QW1bQ&}<4%&)o&~B&$n{*Jf`@?r(S#xpA%bs&= ztCvxpa#4f9u6JS3WOyI=76ZTb*@JU)u`=k%*5VknXMcInMXrA6V$FH>UKRGmKxaD8 zOx*m7Q_W%Dv-9+^e`2f-TKst|jRS*?>nG3hJ}mADvEDfuFKqN~@HOD9d}$~q_Glwl z{YN`<@fgLsF8DXfT@MaE#d=W4+Zi>{7v}u9FAqJ)9^`4{i*9tJmGfdy1F@N_x3hD> zwmz^g20HNN{-QW4279y3#b)GxV+{T1S-(KtG4vz9;*-DF#pHQq$X$KD9rirS`;HKY zc=X3y@b!%l)8+AS$U|Sz+dfU?2=x(%HKTK4f&9cFM{y1Mi&;GG5Bu~Q?D6N>9a;}M zv&qLyh{YM2$P;YYx8|%^JP&pFr{;eY{C_&W8lMRFJ|Az33qmgP{z5z(eElFk80W=o z*t<98f|l=(ljEe|_x16Q@pPoo=#0kC2K)aKACGs%*0?Xe9FK)_PsL^3IkwTc2jXwz z>^L^!c^1GWLw+ac*1{&d-H+Sl=xOeV`}x(M+fXJ=I-J)e3yuH?pHQ z{HU{M?-qORtDk50jci-7k|jtbu=Hm%8lZMq*6%j1Z!w#{S|=ipbb zxjXpg)0$`g+~G&=bfk}5<>GnBMZSD^7CRqocsBPw_+gt(h{5x&A#Z2cwPp`uV2ACw zI4JlUbX6C5dp;n<#~xdI1a08wkQid}EH=oQuc4k|^CJ$ii@`qoi-P?JgZ;tZna%Bs zhd=!;<{LuHE8?W!Yw#&obN1QP2lC+GIndZSXUrGJ(0{)BwD1hEED15FnK;~Gmwm7= z7q;2-Oe<@i{a6#1ILtBlAL_ui_r{OC*%s+5et3!prbQlvOnk| z2KU+78gdn<_uQO~>q2jOKg4!S*rNqb3Fqz&wX=_*21fUcbTZO%&}h&}4C11db8HW1 zX)Rw`o6Cy^^l-*md*T@Gup>7(=f@g7^e7#Ewhx>eo}C}+u(J7@5U=t0uqKA1V?)e_ z`^#c=Yz%pw5eLUmi`nLjW5_{`Uu{6ZGJdOn3yGE9Y(q zcWFs)z2Tl-Fsh;VhFjN=+9_m2T^|5`(MQ;4j`LI|Q;#ODj-5>s}l-o@q7XJBjPMqq%w>@_4 zt7CDE|9>dn@o#H0Mz-YjgIE%3&(7;(S*Vru$D{A}%$^|i zLB|!bE)EEH)j>>b(oqcSL;mcm8Czlyi+5Pv1b`EQ=q<+3~jcM7Rfe=^sAmWNjG5Dh_#gFKD69)PWD07Q@)S9Q32Ra^D=Y z;hr-?ee9{oo;%7Uiw2^>O)Io@7MHOwY<6edgst^)-zlBLT${K zP3A*Qhn`n|Ht9wS_r)^wfEdJL&snhVc`nw);&5JkC&ilBKFW3cPPoJ0fgStvH28m- zr_)TDj{p3Wvlz~cJ44)jJ`ncg=3QKw-k5G55`yHKgM6j zO>t{HUTYNF4sWP!-e0QUZH+H_%DMQ)eJ5w}sLi&Mk$*YZqoKJwhld)?#>tbpXXqie z<CVnu9?MImnM7liZddse5_ zVgH8UpH2I;-#++*xqWl??ducIerAJBdHpaB3c4 z<>Q>(t$AjbFFMe2$bZm>&TP5AJeCF@^z@yRw>34uzM($e-{E~=TQAW=esqwtb2RrI z@ng@LeY!YboD1!bic+1nx%J9huDz=X=YKR=9IYBAtB1*&VX}6ZtQ#ilhslOvvT>Mf znoMR^t(?vV+cT>M<1=;ozeO{%YA`>uYOp`EYH%>KYVa_#YH%^LYU*S4)W_SOKH$J(inwNoE!r#{wBeXO1OSU2^tZt7#*)W^E1k9AWY>!v=| zO?|AJ`dByhv2N;P{nW?$sgLzjAM2++)=zz`pZZuo^|5~HWBt^}`l*i%Qy&|qJ~m8! zY?%7kF!ixv>SM#y$A+no4O1T*ram@KeQcci*f{mEaq45^)W^oDkBw6w8>c=tPJL{g z`q(t}v1#gK)6~bNsgF%lADgB=HcfqOn)=u@^|5L2v0&B8!AaA>OVhzk)4@;E!BNw} zQ`5m!)4^BM!CBLJZ_`(P!KzjB-li{otMlHbuYIfY-li{ptMlHbuYRlZ-li}Af>qPE zzv+B#)AzsC`P`;ofL7;on|=dYozHFh6!`VAb?1(R4ny>35>l`P`;oidN@yn|>==ozHFh zwOFuf`nhO2pWF0<(dv9|(@#dL^SMnw8m-RfHvMe0I-lG0!?9r1^vlt7KDX(&qt*G` zreBX%=X0BWKU$s7ZTbaSuxk1VX*!?V^dr*hd~VavNUQU?O+O^9&gVA$l(ag{ZT0-O zWcB>FWcB>FWcB>FWcB>FWcB>FWcB>FWcB>FWcB>FWcB>FWcB>FWcB>FWcB>FWcB>F zWOd(?|Ls?tt8c#YuHjdl;rE^$uetK>D_2~1XTSaYZ^w7I^R|0e48Ilm^=;;n{?AW; L`qSqd{uTc}encM1 diff --git a/data/synthetic_breast_true_exposures.rda b/data/synthetic_breast_true_exposures.rda index 09533f0911ea3e11e7cb0bac21250664225c85c2..3099d4ede30ef55a11890cea81da2589e193fd60 100644 GIT binary patch literal 13209 zcmV;KGiJ;miwFP!0000016{jyR98XQJx&XXbSt8CN(h3Wut`B$y1To(K6E$I(jqFN zfQW(uf(j~Pf(jTY3M!y9Dtvi=zx6)rS??cn@7%l2%$%8f=Ip)iT60#-TxzfRULqnQ z5+X8EVj>dK-6|Q$?uD3$jEHLY&J`5l8|>~9?BQhR80g{<6l@n97~*2*67KI86cXqX zL_|c&xNAnaTWEI6Yb3Rq$=oFiK!N0stJ_Rw$Q;bpdv@<1@^oGph~3OU)(NZem#Jw; z7cA#UGq*&N$H`+8UX~ynVU3;P5=8FPv7?cN1RPsyAnQw|LacZIXJY#Wn_K+y&uo7Sr<#HxxgpYZ8fgjQl{K1j@+?q}4A!f7QK#WOQvUY_L@iHyP1_Uk1dgLo{3&wU>y1rz8O3hejsIk8S94cKO{V` z@?-mU6KS^ovTGaSNHV>2xY2wRAwpQ09~c9nHO=iI7ccU;qz*D=&>+n!|^@(Bd7Y766=#`q&b{g&uAe2MHW#x`V>I6SwC=xRK~z&bB7_8V>UG0@r(l5kIGSbGrO3;+D7QzI~)Y ztg52#>zva_V-8=|skB6le(6^hgAPRJ^@uIb97e%0#R*#CXrxnkNZz#>K?Q675LGHS zf=^R1iY_-J?aiwTeMbl=t5(x(Hqk`pm}d9YpL>yVBW{0Rgez(vzuc=N*#g3WeH;RL zq6keMZD#nk2QdlrGJ<@TD7>#)kxXlbg4dfbW6h2tQ6>C{)!P9C#tThUe4RqImfgRW z+ZxE$$>U1d6M*Ej4ZMFg4#MS+?q?pPp)pQM630f7cZ6)Je9!LpX!3kow zxo3~mM@Ga%xBEcocbqyI`x-@Cg9+6G9w=4W4slBRj0}SkeLh`lF-rQkb zy08Q(T<*7ee)%Dl_fan-|Dd8)c0RMN3FXU2{}g6^K^^I27+IMol9Yn-lwG=zrSWX^ znOOspjGx`Sz`BBrwdL{m5sOH9^=4zJ+y=#*)j0$6{}8QN-!ob{i+JT|YxQ$EC{ooV zvg%cVUukKjZI?XG4fjoz@6SPsy_fIbb03g>bK!sdHmqJ>M zKk4Nd0+K`@JTzOE*xgsl*?$h(qtd~+E6HRL^@Yw_UDmnC4OFQ9>Mo0vClu7J#E~d@ z?(XXQofmmlw8hmCfyiLo__y~FBZ@X#lg!RvM3Mn7*gtGUXRRsYTs@B194uLbAN{?dlO2B|3MrCqqG zOOHIo6|sT*E6D7BY0CcWA=0CHBd&@CAffz>@jk+S6b*l$S#w!OX^On{3^b9PPnuiT z`5L*Z(Ku-ljtuP+bw*s$NS`jFJ?;1qDXAoGPkfY-=5_9oh5Bwj7p_&2clSc^g@I6k z-F=aj^VxI7R1O(jP8ZEBDR3_RT}BIyAnNXUSWjJP1EE%F(D~;-#H&2W2`q}(&381u zjtyFfqqV+qh@t|CexL8Jf)YtMAV2+J3VBq5;-h<>BA5Q7@~b-!kv^`|<)!-dp4>57X|`Q$;>&$eC4QADp{zWOQZZ zPOZI;oEF>84f4YO;FjrgNk3#t;VDQf}PovTs z2F%&r*GamdN&11V{IUX^8fN!2HA_QBHKZ=ja2&c9Bdc!vn{4=g5DLCE#-Ed%p z7m-Cq2X$WDKtO4w4i&!);^hOY@2iBuDdksrpWuB&zIe(oOL_?URM+mDpyq&$DSzMU z%qs+oJR`Pi(nOfuwf7Fm|DdBidm#3`9NOKeGt1gG;6o`O*pOcYhi*SAgHr_v3J@3D zC-?~=j@8+z%gb;MCu!S||BC|NIeVoa5il&MzuWSY1l@VspTcigf$*rVpjPERuGSnG z>^Ap8VrI5MDp5EB4f9(>a(FSmk{3WIlPdElIu*r``!1zhS(CyY~BYccSEBjmb@zNTpihxb>pPL=ga z1O)9bZycOQd>y}@Ru14|ree>TDGW~`h4jbKPe9Nc>_~L$K|Oh5_1PpUgcLqcJL(?; z_uJHWTCWd7yEx5r)6f|%R|d%6`|d$v;ui`1xaV+ny(U?4kbodB?L9L&{P3=u4O~y` zfSRI7X-jb>eEb%*RmO;+Im2sxj=dF5xljId7#)SBcHgJMLVGyTgt8Md1L5)fXSsHK z0g~Mx7dX?GAXKP<{mw)9xsiBsSYOwOoO-d59gI~DTC5q8n_}_j?8db-O3v^SnNq;{gO}F)1 ze)0(zA1j!o-iQn$Ji`&`%(|A^Mfm;+^@j!*eOZDm=|xK282v{SpEn#m&sT8iuXO&jhD`Zt$F%*5sLBL5sQHciGt+ z@Uk)g6+hC4=JTh163KcYDkJBsse&h*DK&=5^3C91{weIrNj_M+4ExzxU4}cK`GJhx zd8d9q@owfwDgtFCZ<-GEA@tPy&FkypaGLXQQ!nyIZCrq9Im3GdTTk77$#xSRT2;K3 zkEjv*xJ!9`elP5LEGrYXozTg0=*Qk&KJ?&)iP&0|6+(V!@Z2G;MlS34UBcEUn6JAF z&P)7&120`)-@T)-dy(1W-O7se3LjV9OW_~{dmfOyq=+!<>ozvtYe?Zcyv9lrj>-n# zJnrUqNH+YPF#aG3UVrBPnuqV=wFZZ*67veKTI_2%p7$9Rd@ZhBYn*6{rMU4mIRloS znp`zis<3vR{rqZf5KgiQu8ixq;XujMdRy5Kj+{I-Ru?4^@qswJ>53lWo`|^`TQkD- zUk;J#`VV-y+Z+yVpM=}D($3OvfiV1bUq|-Zm0jKNaQWo%9K-~aN`0&SxI5mXt4n7Y zFwy&c`Kt9YoVG{YA7rWh2e;w{*0PFyZX#L5$1u|WHEutqW}Rwjhm-H5IIF=D2uTBN z?vFH)9?$-c+D8~(w>5sq@b@Bl!clzPKMg61OO=PZ^bnHlKwH~9gwW>(yb=NANZv7+ zj%6YT!K~;-NSHBdaX2t1yZ{voy&o@BH0|=MR+}yjIRvfzvGbveLAIHDg(MFh{8b*_ z=YF#cZ-)!!qi>}U>aHasu6+%WxpYP0{fFUKU;Kf&pA8Wjq!gEUM3DOIYR-il^a%VH zerM3H0EF9sw?!N-xlourSJ~BdPHzi@7YGPotf^kEA3zXO5pPoHQ8){`7l%X`;6j-| z%T}lZlI-GrDU9FXntye9FMkn28e%nseQ1#EnNeILw@#%bW)S^V@-9~r!m#!PDIsmPVYa*aflF5O$Co;E?ipMbBPB!g z7oh_|R9?vb_eoTq;RjNETvIthcoCFl{=!lG8K9hb!LrFRzg}&jUhv+Vtt>SV^5epPuAIi@;(gUb3-ieIl_0OR z`n1c-o0FLmUZK;f|DWd-5|nyAntc0q5D9VyBkJ5>(vf6)Bev$9_N?!@aovL-f1(EqI!5&>N|@-pQ5rPU4DPLo9pn0BYGD zY?SqiqoJ(r;ozzhI%1tN$dbtsa)?u=?8IMmsIWYqj^9Cn+57TQDIg=na$n@fShS~w zZ%!Jl;F_fD>*?M=+%6y`Gr3HJR@)_2lPA>37w>GzjVVID&sqS%Z3zQPhSKZtMQFR< zY_cshf$P?{ZpkQ1AcvC@znp>K`FgCBPPqkKY}fdpC%(KU=1%HdE-C ziZFHF@Woa2Cu@Tw&N$!oIQJM4KiX@5UOO|V3xfX3Pn>6GkzT8F&f|0jN-C}sePy@} z!nN5?TWLoTA^Xcy^w3Lq=6$#L8{dd}e~(8s#Iv}>_;gI6BLp3ZF5YJ{UL$(QCz{-L z0ma-Ov<*)Tp*hyrU1j_UYTi8X`CaiI<$vWr#Mv_<+sBLDLW_S_N9J}Jr~%1(#Gfj~ z22h#ta8>N29r~V#?hNqSq5G>&@G#LkWC^~duQjAW3$cw;6&(|jLUKN?@88DF(3F@J z(=>FuYF*yqG(*0@@0FT!E66<_)X-M!kMdxJ%&e%dDE8_8T*gL<&QFd~@#i_vZo8}z z^v4l-57P9k3XRdG)*h;D5Qfb8P`;ZwEV%Aeb4ppDA2lj_t`0|#dVA3mS=>EIM~vIaOE*4S86$W#!I(SL9NAoR zgNjNH=(=>xP4(eHbktctH#$~_!VLQ7> zs&$cTk{C#n`uVrf7O4v^GMOX|xbmbi<<^!A@~agdgNq$aZKsb%cJ(9sLTGZ;+#BS- znXEJKdyXV3`x^mQHgWDM=UZjU8a$-_O1WGeiZlaq+Vdf28}XmpmU6U03k z5OL-;eM5aT0(|LBt_1WVVs!Y2st*J5N}jClmucM9bGH+Q>XQ(uwRCdbSptQA=ERh5 znh?IhqFT~n1p8GU_h-kY|ASl6hw8-kh8Dn!{{0fq(`eMz`iS4%?}vylp*p9e84%)H zb*XnM7U5RJ3;v582(A+qh;Ad><(GV_GOuqS@??4Fq$v>+MAYgUmpfp^HswTFyMVNN z9CR48GKJCcqIzDBG{zhop0<-_}e_(lW)rjzgr|y!@t*2 z!1=*$Sdj>XZuL)w`#4anGym@I%YpYynEJOaQAA#^+m)Mt;44I=KUeny zHMY_;ud_d(jTGXS*F|+iCcSY!o@0VY2U$YjhXvGRi*=jH z7NII?$u+{e8W}z66tPb!5Xd{F^Xl*bf)}`MEOdo|&c^@V@4Q7cvJ6M4J@ZC%!RCG6 z&Q%mN)$k9F-oWMHyT7xThmjTAU&}V`E_+OV-qjudDs#;a!^v@`S=-7QX|?im zBTi$e4VW%?Ev1Qun|J-pI89KvzDF^~E(t=H`z9PG7U8 z17{KvB+2Y>jXm65Cl&JSLyP-RXcc}Vg&>Wn{HxegtcJKlx6kuPwITcx;&{G3NB%=a zY10^IR3AU&XJXo5&e_ zEAU_<3g=F$N^M#(qp~1csM7c=3fq5JHsYoQ%!{fSxh|9iX$XF(#`M@=V>pdJArP+5- zrAL0DIrl7@@6;7qvPmG~`u;!2*9q3EIBz21lJNV}Os2o?3+#kF!tnLSkSC7P{pApIZ<#dqGCxkwWHhs)uh042<`@h}PMy^6#Nyy77urc(S z)*5|<)6i2*?hDmu{Gev(r+E)ScDhaFM83$@y~mmmyN&#uSt07a>$p-RJn(^$5sk09 zDXjW;c`#AnnHAF%6gZl8KNPA&QvH+CWez#iC^nA|NQ|PPnw}+)qzvI%CBHUW_>ka# z-ob*>98o+%4=c-kQ4%T2yzu)Gq6v=2ZK>IjphtSM{9Y2`G~XW;lL6w&p7SRsQ6SY> zFhtn+Fv{h|%ZMu?k@`YECre5hRYup}{@Ue5ZW1 zUES&;yk(M!fWbEd(`N$UVJ*CLzH7JrlwE?3atdO3u5&U}2qLk@TmCoO3L?s8vMK&) zK<314bD9-?Bn}ve2M4kLFK#LQ<6k9mk8g#T=8j^|Cs8$J_DYKrK1cS(tXbel9% z76!3TFH#ko9aDmmRO zQtqhq$1_6g;6Ht(jnTo9cLFLPC$eeQRJBt2(IVh66L1Y|4I@?G7}}J6>CoS2@NuT^&1@{ADtW zMrp0vRFZUe-OM7A{|Vkn#a@FR&O^tC;J$t7N?)!5_A)7Z@>5x&>yhO?C#~-gGnn`- z|JoZWDl3mXFN8q8f%~A0h6n6};`iyAc!H_xkF1u}S>&DD!%P^p#qp-Jl4FVo(c>qs zZoM}giSaA-zl#LX2-24U0S6JulIHbCP(x{X;qm=O-R~H@qaKvC=Lno{1wHdIZv}^l zmx}Sw9i`3jJM!My=1TJsZBFYy&fw5%LZ3v33ijPqWh)LShl!PK4{O zU$I}2f&3ISm8DMI^ln0cpY_;Rf?~hK~Uxz@oMcWeyHAH7Y}QrL+PTfqU`MJ*`$gju*#eb zV1G6X!mJDw>bS(ANj($9*$_S2|2Ai+9(4~a zgKY{+P;x#fIB_o}biVnCckXjOK(fLci+#S;Na*^snbG-6X|tEF%Dwj) zZsb)()Ml|Glq)QOtdSdWuezh=ls!@1e|Kw*(OBvC?I{T*^K|eS`x|ugc*Ar(tuU!5 z7Adm?zt-puRQ^L?_~}bM{iX$!A_7-_ncBlk+iIR(%ne>7nvutz zkAPr8k-Kxp7Ws7?Edo9MaPrWj;ISWv`gP8GsT@QIG2177eV8A%)ZK3cSs%dqZ)$|Q zt1)<$b&Ki+uPbdH+zz#TM~uQJL=^r9IS|g{`*pj^4Rs-;@8ce}gYJYk)MwZ=x&16=x`Tu}AA>vTY2op1 zIN7D77LiYzxj(h5qi!i;;pUY?a8f#}Qg~Sv?L+_iLsCZJnQNXoIFbzlUs|cmr5Y$G zoh^H+JPC@02)>PdiZJ%MsOg&Y4W+a9(+(FcA~ld{kPwl!L}w4#qwmKTvE!di2E8NtmVhs$Z(9L+^!oNh#q7j1|SE{P^a9 zgq{l?iwAPy$D(ptAf5r1loOI;zlA_y=2cQy6@cUAkpu2IR4AS>*)~4s437@p{3D%a zh*)yu+s9o4{ttf&o=$&<_mxX5VH%~#6%Q@x(FsQCv`%Wr@PNE=SFaTXxAiBh9dw8J?o*Sdb~3|NDGtZN*VaE>ha zMCX%92uHO&qv@{3Ktp$*Q}j=a{gJ$Nc0dwB^(2AS0XF}^t<`gJ{yEHpU|#<6frxe) z{4-&7dC$afAWb=OO&Z?>g-`#>9YK1%Pfu+sKdaVxzy z2SN6c>Z8srZIFJe9qY?-1?L~e@U+a`{3>-usA*gf!mD1d$KNhOf|uiwAIBn2tB-`} zuXW&{+wt)2g z>?RJmfk^vPx*pdd@Q!+h1oxDJX!2Xc`O^YWocN=$F?13Kn(pQn$$W7daK;%7^ywmgV5N*o<>`wWI5ft8Qyg5XhP9pShu4i>9&GdY)Z zoV1j?OIC9aB%AW4=kGUz((+9xVfh1ys3e1*Chk*O^s~+UbAcZ$XMPc_rj>)2)!}fY zlQD#XzSD53@`BH?pW=ezUogDo`AH$)4(+u$Ha(|g5N`&P&m~fV;c9}!BO^C3+G)Qz z`PL31t{oM(T@OHIV5E$mjsnKlG_S-91tZKV?xvsANyyBz5>Ecr#UUHtQ!6hMKsH%C z>=0H79^LYR8^p9Q463y^8(@IT`AaE{HZ&lyIQ8;Dv?UCSb!VSSoCYu1_Nu4iO9&_Y zAU=CS5zIpez0ZbS!V%Vq_m5Lf!fG{gj<56!j>*nMd-z;KaJ2ULgXw9=uw6WHJWLXj zF5XRf?h;DdUtE%D3b&whYmuMkZVjZ4pS2u4;{pYXIS;4kk2p+3bKpZ>3KTXDiqdI6 z18r^2)0PLMIL#(fJ}p52cZih>2ag08M#iem%{xKEYDyU9kA;c+zWU_QVQ{^Bbfk=l z8r0MbHfy<@(0m#%Xn%|aIsvYXRLkF?^=_G-{mcZ?c398#^C?26>)ef}Cgb1>`S&G? zLmY>`Fx@23^jV zh1G8&VCbZux!&WXv=J3oZZ%ne5p9@?X$eURK|yIwP?2PLzdhrTDRLA^KK&GV@7t{;2JduCm*Upl^?bSxBtrbplG>q`c` zaY}5x+Zu=iqe%Ge^1$54=>JYX8hh@K)#)TQfzEfC|9~3{7^T}{Q%G;1et*e6Y&$WK48Z&<&zTTU8m5r$*?<}xANZ5;JCi5~1hfBk+AW`Ns3JM!~L z3zS(n={YPPK)1>+ROoRs_Mf|_DWhiy!{^uTzEQaet1*r}&lNKdmG!1-pqtq|hJ_bo2QRg%;IAXh?6k z{2GJOHd&wEHSakHM5G^({M-gpVLzJ}2}Z8hK8zYd%T5O#>W49k_lpO(J9U{MsG z;*k0awV|E|8;qu46UkjP+Ig(>_r;Ffxr7w3%e;}0b$7;z>#bKg{uwImNarupjEjRw z?!fm)7dJo=vY7fMN)3_(-B)6fEs%{7kTbq}1A%5i_np3q!jiRQ^oY_tR1NNM#*b1W zl*Va?f!qWt9V}OdD>LDrwOYcxc?ZS<{?|p6t3lerK3FhEhdrOp3g6wTLjrN|?-RT6 z6|W?sd*G=Qi2PE>1>H!&?IEd2-jW6#PSMVz*Y1FcI_wlLt02V4qL0x16NlJn^VaHU zC}e04c9)j-gDuLcX1g*Ahvly5-y8V`&()~O;_^T+SvD3NZAu0g-TaoDIUSDpo4HUY zk>iLZ_2KfnHzBAUul8%7JecQ8YlEV5L!zvA?4UcQH#aE%i{EoW%OC`>-3(o2O83WfR@%pwG{n-2St`&<;GibElxAZni zApZPXP9kL($jK_B>>atmoJCFg;e08aY+CPp4~>O>MP*CWy=hp`ScE9A$b<2pNeiGYD5P2mi=6z9v*9^gA{CEu%DX~~R80sPbC-*&4p2dA zEIG-MLj;k-cS>%1ibLmxys$#f6>uM$mzJkG0())+aU%MDxc>M<3MWVKoUQH-#Wr-> z9Q*{&OT&asx2Kj$8VBf=j3yF}L%Q(sgOW=VaMRtsH#QNEgk|#s+Y}A(q+L>dbkG%| z?6R5xgZsfmq_UNkSONc%l>&12M0i-)p1Ng51!I+MQ`+{YkS%9_L*JYZS!uBg6+HVP zuqGL|{yGRsEwXZsKi=a+8L_2@X)uCRn({oWFC#K5XYXiE3635((DFPt2<8D#=3Txy zfqp#u=Wm%67;Y~v3t|?w?B_3L_X#3Uqe<6Joek;^Hf>bj>p^$;PUR)V&oH1hIzMsV z59vkL@3d-VHmu_}5NGQJEOfS>|6XoVs@_Soghi2OVV=q|Oi=_K1!(lroX~#8F zjQIQR>K|KXkS3={5~=$V<=0LNjXI@)V@HZS^42rZ=(JPNe=oqHv2$cg!E$gp%;(Mh zaA9{o9YD;R%TsNtYJ)KdElHQJ>|Hbvwv`GxXM7j7zV8oP zd}IQJqk&l7Hx)4J6fXvy^FqQ=pJPD=-iRsmV>_o-1Glc@$`Ug)kW5+JXrcTJ{lWF8 z#VNjE`ZT(@NuvTORUQR8+V4OD-?u6;OK>+0FjBt@1kYf~you-}7zMPhI9!?C9k-45 z?NSON(jOWXkgr1}UZ4LRQ7@wjc|gn{FS-d_&NV7PS#lBz53f%~4I zYZtVp;d(qq*|01euAGvW1E1Ew`^`Z@UD9WzZT^rK*6)sh``K%ntUVM6C{EWOUz0|X z>+E}%^)0AJ2PA#n69Db+gHFkG7ZAT>P1}(@fUpy~afQ)u;St^#MOQxq3n|enpI<4$ zB5i^1^3C1gNc3?1iIIMn!$wAOy3`2E?p0VWb`ztjVmq+4L^%Ejw||=SV^S7e`>@ zN-jcX*S02JkHX32qqNO51;~OEE;G-H!_h0k$y9;@`mL8Q@(#HKF&u)+$&r~fc_ za`PibzeO-cr3#EDo;Nho7{JAzcHHCjUx<|VaO-OYL-djP6B^N0$Ot`Hn2#}s@nrD% zm~dyPC=Hh6E2}{`C3q+D*$~_4VWK&vVPe@51!ILk7limB2sbTcsr}!e{jpa zJUHlbj{;mwS^E1rx)52V%JP!K7B*~rs|Q0#;r!cKR(5K4{yv_%*+pD%p=BgE)p{WC zY9G&Soh1Zv^4X*xTfpgGL6VDxf-oQOG1_*gfQ0}_@(tTHIL`NU`<0Tz`!@&CGT&Jw zcuoIWIy?=}yhdru{e`fo6*+U%a1ma*w!)l*Nodc%{_vUYAx>mm6j;h4f}^77+xwlj zV84~g+IZ3y`boW}{QELsY+Uj{a#c* zrC~AqO;gH}95%;er0;BN!@9`%b@HYQEWdnodGL1uZf7HFXBh?&y0g}17jhjzs<(OR zqix}EDp9hiX$y|U&-aO_J%(NQbNeY;z{xsB?oMPgjC*C6Ooa}@_PO)i0DT<%bpzgg z*4fQtFXfXuTI8Yi)xSCJ_zDg)oNw6?)I}J*`Px3Yw{Wkf;?LCG14oL2Z=-41@Fy;z zp5rTlW?zA6%9lGZRqopyboc?cV=+5Vp1p!!6N`f(mpp934>q;tiNoa1sHJZ#39N5+ zytArgL%3u9f%TsZ2(x!(KATI1;N-f1PL`uEHla<34@;3xR}`e1@zt9_mYO-BXF7VlB^v3 zLc^WEWN^c?$N!4OrBJ1{SH0)W&Tb>5^yBM*UQyU;j|uQcaX@9H;Vav(J|vWdW+W^( z!{01&fE+(Sj?1HPYt5Bs6#G~6^$G}5Ayk`9NZNaV{#P6dR!J|A~7s6m7p)!hTX za}cS%reQpwxtm8OA5rww!RdZbMYc~RlDQo!YVk-)|m%2Y&ulki^F7l#|iR9k=OBP1{{a`*dcB1!FL z>&Mim2>r+=K%~VFmx)mKo}nh_?s@Lc!e;=htMqH?=1&lJr-SFWq9HuZmwDw@3J@3~ zvO`{|21h1)!M}IZ5K`P+*1pR_0~gH9g0~n@7+)~)G_4JpRp!H78Bu$JgWD2|q%XP(nV3~Bl3Kl%rVml?ScFd+ypw=^cT!+kL9 zekRT%nFjmVakfKIUa$*N^qfiQ1EJTZxs`$fMkx_R9;4IncIG7gas3(WSDXvELg;qm zd~a%$KQV&lJ%Sb+lM&=#T6V3Y5FT&RH~Y?4Az+-4=;)pT!Yi7eQM3&p)L)@j{#S`` z$(|oua-|4dVGyNmNq-~0|BdWh&v9o}-k4~VS3CS?6O8-Co<+5D+4h)nK0w%_^| zOafZ}?DL$3-Np8*z5NV0aq{Gj24f5e#fG{Ly?KZ?zV4kl(hV=N-n;r*o(P$_UpA;- z3Ag0Q;FBY_5g>RibWG_OQknHcC^ikC^&{cx2O(jYeV@Ga%iu6#T0E>x$4H=GVq6j8 z@DYwbdsAneA0p(?A<-!VIr#p_6slT#f)MB7Aa3Vx-7+X+gT-K!gK4g2JP% zEhJDjBdpMGc@(ZiKckD*6_9v)qnypo7Qrtb&e(h?h2vr_ap6WYoZX{lHa84#T<3ks zL%t*yZ^1td*|5ZQsUv;_v?c{&#kNx+9?0-M}x0%%O z|LcI0w?okXI`F?Q_c}WSI~;Wl-1Y7Mm{SG%g&qB0pZEXIdA9i7%g)Y@@a}Eox!YEZqdY literal 14050 zcmZ8|cRW|&|F*0s*}FtGQHV(5GE;W;-h1!8_ujIzlO&arLWqhaO)8|6B1twC&*ynw z&-eTKy?%e3&$-X%+~=J8obx{Kab4GaLB-_6K9hY!L_{P+q$GQYNXQ6HA~Hh*HBlKN zB2r?)#vUS4B1*#gK!9JMqqT>(t%IwVvp?Yg2Ev~dgoT>0yhTE*vD5=nU*w4|T;E|d zMtXmy_KS!AkfZrZSM+WgGESL~y-rR>s$eO5iisH#T;)!TyPF~I2usuqry#PQj~)xl zkHd-eI?|qGN<K^S@usV0{lpTeWxhl#zATDZ3CH~v zf2ENj@|W*NlRshA zn2pGtPe(S@DKSmS3?zrRzm$Ev2RS56hf>e+B6&c6EBY@rauVrS9o**;|6ykJ{ID6) zE-}){3XDN#+$)Xe@&b|$l(1}i{X^_yb8pt~cadV{BfY+P90^8Ox#~?u5GaJz`My!a zwWK&d;p9awr^F$~G-{-n2QDZ+k3+mr`##^J3W%8=Z+7?RLCPzAQyykv6i3s2R%TpA z?5j3b(}+Zr*R~!y&9{s^Yx(}7-(e`|cIQOfdOZ?n!Kg(+l3v)l}kI>q0Zb=we;(<%CE zh6{No zI?DJM;*~;sdx6x2sc zh~vZva*mKrmhL6Ij|R8rHGBP0|LxQC?n4_SKh@tube~{fy>^pwQEySO-5*=o=YnFT zok07PuSnA^(&5vxK$g0XbWBqeVoYo|B=d`q#OZRc^S3vWd7pMc{13{irRURY8c@1& z>`#9BSJaS91e2DyAwj`EN71nz8EP*^UKrOQ!QjQ+%Pgx%TVEOb82SrIZ{BSVlv<*Y zqcW>+{vRULYdc5EXAz?qVWE043kAyRMCM&e@GdScw`!Ng#lfD*(gRsYvT^tNdvO|> zcgHW7c>F<5E#uS`#WrNR#f8wyIG{|eW^8pYJ5qz{6P*)&B9^iu!t-VZ@(UU-`>k># zwN-AW@9PqBY;~oFcR3Knrk(sSRRSrIJ|xYNaYzt({KR-ejPPBpWd7l@LAk9#dxGI2 zYV#e=wOeE%+wXMcHy0TsJtL=T*%OAMmo83TKX{R2PE%MJ>W4Ij&42rzGN53qCBgX8 z6(s2LlFpTGBEz(LqIHrOxg$+8e}t%!`dpanL(okW$X<$(;qXBI+_|&!CbdX^sPe_O7257oJadr=-Bb}vWwF!E?D%nx0Bi(Ixb8p;dRNWB=aIoa2b#9B6I z4S5TsYLuBNJh|7G9KuBpLjgBZkDq5p|U-QvKyq|Zk zUduk@w33O4J!C}g<@P@s@zW@ASSnO}BY?y^T_#~{fYSAG_epR$D_ZrfBUmLN#c!Jaj-q7o!zKAV7 zZ?He^015_w%&a@Eqc};{Vg~9+%q7XLx&0Q|$`Oz=4MCd5sTzGwNu*Ad(44hYVq;S{bC-udRe! zmca9CdpvRR@{y5=F-FA4w26i-RUsxxeNS@rO=LA$-QFb2|8LwfdPxx%S$GF-<9I0O z><1g~_j@J!Mqq2QL(WIA|1h=VI3Fv7bkWa;_r9}--hFejy8Biz@=7P2&!vWoOrp44 zoiaZ5d<-))+$KDyR(^`>)^J$!?^$!Yi$Zz+upYL*aNuQL3t$t5@ANC1%GK8}2gy0fMZ|C6#;8He0p7qG}G zI}!?mXLSSjTc(hpIj4wca)|RBd3LJ!Eg}oL+P=poAvo;kth^O79Qby>6`p*I9FOtq zZ$jGeiG66XnpX+g`77FfDuVEZsu7ciM3><_2f88z3DvRv*^)!(Wy^Brct+U0 z>U_enB!K=SH0E5&x(KPicDi!?Ey4qSeHngXhOoU#74kBjusb_B(DX?K<{sGz34&Je z4mo#~e549K_n(u5*YM&p?c{92-)W?1wS3P_JOzUjc@tCwdvjr%i)D2@4Xaecm%;H@ z;PW*g?cA+$g#0>|5_mxpp2l(U5kjBgfBj{8+_!zO&A7?2wMzk|GtU>~f&E&qzcCb`+Io=TU6L!rvzL?*&giE8fpWUhrZ2pWj zSt+uhqH4Ub*P#~flB1-?nXiz}si+=8T#44D{(!s{LVoysu~6tHG2Bm1k$q8ZhTqfZ znVC0(urmA^YyZy)Zj)2$Jmbu0GV%T)J$nc4mL|VrhI-L>>C8_eX?KLDWqmU`?FI)5 zwSkgcWB8PQ4!$PG4-3aZZ)@{rxbT@AOe5TP@{dy=W{xDoPg?x0(LfJ^&V1auxp5r! zbFR*+1wN>b_BAS{pF)7e`VEA z$FB@U*DEX#xS+<%4Nf8z+#cw;Vh6I05c|=Kh+55aP8OyR-t+ zDz2OEZ#tPX15>^xr}p*3XpJJj^DQwAW^U@7RTavxaG3q_W^MrX(y>kq8~0&L!Ps(N z(HnLgJXPjb#1T5ZC#2z;Hlm-2IvH3n!0BHWk@Ch*xVu<#1>Bl|^Y`N0#ozs)_x+Kk z^o?tTy5Y%++{r9N`W8!kubw7c@9FiW3-lQ8`mu7|Vj1>3LoSaqRQ?;cVg%MR3O(;4 zQOQ#;%;zoc4^y#BHr;}~*Tiua-6g~&^tHM?RYz(J+XpI7VYuH{Tae=KM&h{L@eQ98 zB>h?{Kisa3z(iY`>c#;Cz0~Cu^Cd^(uI^M6BN^h13tj~V8=x9oepw-TC|m6Mbh)g7 zkYBY}wyVj&fAx>GCv7A$ja|ybd1&FI^yCruyH$ADUN#wdFM%MJb4QPB+(1}1Z9zyc z7rbi=rvTRiDKtG?XPSi_)E3T>zp~XoEObZo9#z(XK{OLsy1rqn$B1W zP$A*Yc%=)YE^aE#kNbvf;$|X|gbM>Pq9gejVPS_<6SJ5#o(#liUFp*sl*t(sV|`C*!1uXc-}1{_r|2KPrfl+F<^g zv2|P-WXjBD%0V2{`@Vvm6=f`uJn@9Q_Y8sp;>nwCq9+4PAn{pgbY3NV>`uMfgtWA4c~c zrX%>bfGNIyLjG{?gG_PP1g=@vMX?3+p_=vaW=Yp^)RnY8>0h%$TauHS|smpfSq8MQQ9As@^^JTrQhJ z>0jCDXd4D(db+cjp5rIfk=gCKDnO$4p3mi?eJD?RvL-5Ljh<&ByM4UY==i1?Fi7+P z8G`TWs`aSRw8zrEf|d~pfmxq64(#A=P*UWoQ3^Vo&NXjy7$aAAd9~`|DzZ=d*R>Y< zpfuogdPev+6nb`iDPbkW?ay`+F_+kJ%W6f<|BoGV9;axV=Nq6^RqW$U(XXPh{&{kvdQvXB^@;AxJ zOU@CFPwP8dIh2aTfg_dg41b`N)qbL)K^hmoQf?f*A&$NjiJyP#t&qIvD3wlJhilL3 zlkROxA-D4MFgV%J(0cY{SbGmLF9#)7%)LkMyNMc;o|j0Vw7KJZZ3`E#bG%oisKOJf zZxkz)!AQ|1qq!95gzIA|b`Lyhk<;OuVL-fq1h3`U%d-pUco1cfLZXYi1E1&5kWnM@ z8Ha#nt`+JiMUPS(EkP@l@o(je#ORbLXBaYQL~L<%aLQwC#GkX@A#e2~;1Snpd%izH zqQ)@vZ?Oo6O2ZZ5Lp(#^dthq~w3&R>a{N z(S1RAaHspY#Pd7?)zzNI9~|&T=+_|4Gm`WObgH=8H5r8v^F50`zuFK`BO(ydN=nEt z`IM#J-a(jLY0!ib5n_+3)YPxE!JKu{o}zjYDG%9cUv-SbLj2+h!KNA%+U;D{AO$iS zvxe;N?7;mW@Cj8nPf@RJWpkz{&ig-E%?g2{M2M03|IyZZqb36qu&oWHvWuL*L zefpk|vLFFZ8%S2LD@C}PVuDZd4iXiDIhXG@!rit?n~!Y)ffJkuH>7%zBvWSn;?@gz zosD&m7j!~^VcrL?sJrm7e6ly!iUZ#Fh$RM>*OA9DZ9OPYgt!jX&wBgWQNVe%X)T8s z!8|kn9_`P9$4s#5_jVD4-K-&$n}6UXM5HrUvw$iqN$R(m)2QL-NsRm=kC@)-4|Yuf z$j_jP?5og&s)D2X_beA&Pv?!_T--#($Jub!^+WJYNvi5QvV>SVuBF$tI7)KNGkO%4GXF5%R^5p5<4k zA0c~yMhd(8Q64e)xiOdv`G)(6FBK&sS6yPr)~f}cm$cNG_x(n4e1Fjt_Yi`f+%EjZ zmx1sLT@~$DR1lW%&f#R1A;N5><9enSQI#p$VJuyMitr_;P>%|vb*hp_Jtv1B@1*7% zu08}Ta^6{N4+gEJ&%@=MU#Mpu3{`pIj)=UiM_#wrkk?Sf-#>B(%>fUVGnfXE5!GAG zNJ7Y$SrVFEC%zEsj(_FZ#$2$sn`zWAC*U*HvU5ZBqp0?s%6luJj=H-Kyp1^wk-xE5 zKFc}*Ld`wncH>KM+TS{Fx-o+??sviUJF5uWB&~06r+_!@V2{n$1{96HWhAFGL24Z7 zwO!HgNIFd+b&ID2IsHzSEg{B8Ng7ISHlTvN-{r>+#3V?N+T|R5LU>L}WSNJ5?MJ?O z$epA(NrdNK$KFB}L?6C?iASOpAy*O2^X(;apU6uZMLMAJ&uG;WFNb~}japLdQ*8$eD-29ZN}1H!BW$^K~au`u!rmu7O$x-o^Kk;&R4;pA&e(nk9 zLJVolq}1VZghl8U6_9Hxo@~lOB-GEoD>C5v>$do zaY4%6thgW%FG5@^6_?se32*LgUzg)bZ$)1;TyP4D_gf(?M?DOUQ$X{LZf$B_YqwsN;!HITC80 z6|b<%ph~`RtWRtNb(M6?e#9jR$te20*~E`npG&r;6ebAg5qeTy>V={(5vIlEr-+EN zJ84D5idb!uyQL2k5Uu|4n5YyGUGkDYF@X%p4uXNg23#nW87tXS7KY?kI$0SKim1@P z`TjQ{PmJE!ETOcOhs9GhZ9c0M_}sbV$X3J)-A~t3EOU+^=11ACS^sB5M?W1udU*r| z)TXlu{YLP4LF&S*MuFPYE)%_+MkK_)Oe+}FMNF%~kLTtg$S=FjEfs8qve_N!PXs$j zT+J6PJI#u+2A$K#zE7cQ?W?bpyb3xrqRK=+vLb{k=~_~TIQ;GzY}wDSBTk8vu_Ep& z{MXjLmDZ{uMzu53`^z}og?W6X4TDhIH1+(Be*~d!brjwVm}c~P!SVR<~Tr8LYO`m5RF{ui5(!Skcb>xqrRuW`>K`=YZH zej4q|4ZSrDMS03W)>C45Q~aA)PIeLDSD)S|36q9y)bnNvCMQIz{l4?)^6_hf6)g4C6EzW zHmWO|D=ah#xX$?A0BhY)#drEvh2NTVkLbOfAR}GqEz*1l#FLFcA0b1m{o_f-+_>&}=Cee#0cT;kV>3~Gh-4kK|A!gDhYP5dX=oq~P3og9Zx z4!~vS>b0Ki)7ZzT=*CZJhW4js`|Z#DfT-^Hvg}(AC@HNzb-Nr0**fk+Qfe-+@sHWB zW#|UR_CM0+%r7A4;$Eh>5i6W*NGUoYe+Zr4$5k!%g%I%C)!O9(LDYlfwXg3XgfXYM z{}EJESXmr?RIjyw{%%$OjJ;fNxaa@E)1(FLN8ObS2D%lt#=2!aGEEfbLtE`P7S7}F z+qfRFHYMzTpv+q6TM9#Sc?+Q`Li{c9%Boph2VL0#d3v%lP*;>VbJwE*zTOriq#lou zqpMv$yk-RVuM~z?sE#UZH6m@1BL|A?mt47e`V@Y!Y%)?kzKb~G>T{BN+h9s8!3?W23;49nqSR?=^-xL^>mE_dqyCO!)FTsh|p4$plapQJw_KJh?A%7GgAsr<6q z)gjnj#LL6Ax`cRG9T7wuejOzj-p(eJC4fchf-l>PS?D!=vd>6VMD+db=zD^cNGWSP z8A<&Bu0=Cx+V(1NTp@9bJ2-N1GT7mY!+Zi;E9SZ`3J=BqkvYh11qbF1VU+< z<900xIHYU&uzrpm3`H7FPtXCU+X7x^zo3PG*sajt3JJK`e)8}mw<36oOg(T95# z$@FsAd4!2LvK0#+fVUQfufPBW4C(qPR5PwX_`bqMwrD1@Llw4Tx+kG^>22iBk@q;X zgP*M&4#4f(^@&jWEtM_S}=E92*Aw3xA8mxvw z#hu+p98ZvV`km>1uS&$Wf8I*F{aazHi?70^YZP~KDnhF>*bu}S97|fyjp#QW;d6>^ zsO){Pz0P2uuzY_~Ou-}-JO)0x9XuW|+DOSyD2PJRY@ByXL>qGMa_3Nt=OO2WdzX_{ zEcDsyf5y=#Abj1_gz+vNctzUdNao40C+i4vFg1a%iN2UI%-aFGvY_Hhs&$zA7^)@n z9|rwTFRH0`O`td`aP7B|4cs-%=jlXU;7+U_cH-p_;ta{Nce|~STf^QY(CGtvS8Z}0 zn=z=~p*C^j2I{T@>i-cyE5QF@uNh6z6y|0neI~ zI%VwE4CUGMQO$5;}OD%JRu;SVFiO`a|&By6Khn%U4`{Wm&Y z?0nwoi@-L$py+h?7g#qq|14zeMr`+|fZKF5aQ!ft=vY*Zu;-23pKqz6W+`;>?zO|P zSGb^*->i&V1OIvhlSbf{ZIa$Ulz}+Dlwzr?RXDA1q2#&Z1jrXd`8M~3x>Le; z6wf|N;VM`{xc7%!!8$cCWvAG(s_?^UHNzLXibkk6>{n-wZbHFE+HI>(SoR?h3~G2?Y!*z>tHs#nU&58#L&ZxVqAQ5 zSrFvL?nU_(zBpML*5{H%iNbNi9fOMwaBbtwJ#yO^p-Xmr`?;&YKm8}~`P2eDu3cpg zRx3vK@t~qk%>X1%X(qQdr=jsM$E(Ds9MnANy;09u0N+@F$7J<{daWR!wKqXpVZn9D zNR6!x$9`EkdMe5(?E3u>Ih5EAk_)PFF`}i=TqXY%)x@i?^PAW-l1#3XM|YZowfV+oE>;JXiueEvoDkaFI0N)a_>d~GICvf~> z2uVpN_*aSZLJebr5MFbCJNEt;#CX}Cdb9t+S=FIHo%J>x5+8imBJBeDNy(W-+oZZ4<7Z7QDPSxf-4Bip9z<|yY z5KVj!y>wOp^5cKhHV5QzsA0nY^aptm>r`v()N3oOr{2v7tk=Z><|)Q&RTDTeB*etU zV*v6`6H>bBG*FWdH#`?Sh2z$(P6@7r_u?>Iwg1XDoXD&rDYUr&$1hXIJ}~J(A^wXQ zV`~nM){7k*aQ*`N0fE&|s)FE=XBlFDAO>dhQezp%RLGghJRq%l2;wbSqf3t(L1FeT zC~jpMM3mwI&*S$g{PMO+|8toS%;$d-t)-NLm&KMV%-#S({y(TWlzG8t*Gqm`{x9g? z^ZX>2y$Ox=IaY1^MC{oLAe)P)0R8n?v8VdZV6fJBC->eON1fWr?mHcXQr}Ps9W6Nw zZm3_25eh)CdGuXx2{}m3v&6~$)xu#*uQRK!V?jDmIA|MO4j!%2zB_wppyyw0W86m% z$4gg}>Mf~3Y7~s7ds1H(w#Lo`PUGRUD$KslsuRQ4tZP%zKSC(;~$5U zilhJ-uuN* z{h$gGCohYiBh)>`mqCPmClZ4ZoheT*KUVx@L>v_{-Vw`0?S~?{b2kt;~ zM|K`D&<~APnwZ=MHH%T)Ab$)DW%t)61`UGq!_y-rj8veaqPJYn=79S17(ts8%+U08 zVxV04fpZ^L=-AE^bUnK)7kl~SA=Q5I&U3@J;0yfsHJtqz4u7TZez8jD!aupn#323=prafxgY!35Wy*Wm zp)0)9FeiQwe5{?Xmbvdi>ghPGkdiKlJK~34+9m$_oJY{O*eD1(_UdSJi63; zA`e3vj#&n$^P$kGM?7-(b-)41m|Bw2KnNNgd$+$Q5p)JgQMJzN*y9&Y%x|3org{dS z4>}Rp`)IUAGrj?|UMu_uoteQP*&3BZatE~s!iO~;wJH2P8IeB1#sP_|XXV2_FJS+m zPD_W3Bv?cBM)%L9L%72r{BHs+*!=%`|Lji#x2{Imm!T#oGIP+en>~hBg>{h7a3T&| ze5fv^tqZ-EHy*rGx(oAB_PsApi$HOJ^3)l@L@2+i+WqVk1<_vnU*j8Gtm+J2Xw%n(X#%-06X(+NDm zS`qhFHw*-PZXQ*v1W6NHf8HD|_I|z~{9wBVv3mlRPZ8`ZM&YQ|!RHbn@=hWXbS43} ztGGH@QwlUWL~b9u(G5nb;4{1|f)FK*I70jHI7CMpx7S7jAw_ejqqwvOtl{QWJLMVR zlDVeyaOgYS*1{(WOZ~uTR-bpQArYLk^V`lQv^e5p>_|nZ7mt`xag{!}3qg$-mEZei z!8Bi7?H{oX#nG&?(|e4-U9-^RWk#*A>b}LYCt(y6H|besn!0g{fR!#Qh~rT7L1TX+ zM-Z*+iTjwef<`#WiZ7NKv~N_Re*4NnCE5PbA)%=M#4YTZC`xWuG=a3om}VbE0C)z& zo3?~~;He{RB7edKQe^3MW9^S{;&7|T=hkE>6}z}AvlN3~@tzY8izFB~U2n`5UWXFX z2dc`i<+#8mIH&bz6r7)r*S5au!@i#n%$YSCL4EFHQ&)pHVlJ&`#Zv@>jI=!5#*Q0I z8B`?Gmx^I;+0y+ZC<;1dKdan7eE7s1 zxOoJ9E$$F-E2>9p*HTGg)4Cz#IMxWGUncF}(w1>tBK7&D!~@7>sHP9K;>&++INNgZ_*Xl@kowx78PP7y4zNACqIZ_m0&YG`x4vGu28pxh|IH<|z(Cdd!^!V2VIOt! zQ@L^tPA8iSAC+6cg^;Z6qh-T5<56~wgrI*=n`>2&{`oZ@dkFLX)(N-NHwl zD%oS^O5hLtl^Sx~Dw`3Ok+p9ms|d#q9&CDO1SgvtVY{1h-qA%F{^ik%o4@HHYvxc@@hK6^gSqGIC9@#ld5m=F>=H63ZkSX2 zJ_WN5_b;=z^?{+}*)D-7q>SD-S==dyLUMmlnesgB?sswL3XZ^|gq|y!+73Rgxv!nA zXAoO>vyo1y35JS|Eo2Y3!3Oo#K87x^Y!pcLl!w4-PTZPvrT{UITvR_b%^*cao;XbF zHA-)s6&kTm0sF25S=hZ7pw_%aPWK}Rhet1xE(OTIk&DlR`^h5Ve%g*XRjxy|K=E1F zdnJgd{5I>}I{{(Kis}ILuOg}M#+)h_C=er`9Gz)+E zUvx+8G0zkJx*mwk_h!AQQU&MslZs+9)DTblwb?}R1v>p3&kK{h!1#IO*A}%BB$Rnh z)6&cXv3%bvM9sk6(8oaa!4EwBN%Mvx6JQWHcg^EHuVdm}b^a41Y=a@zXHWAi!#Fm+12%@JY zwq}LYq5r^G*6ipa^!a$4H?xAlzM%b=o#3aOZ~Kv`DpJ5@um6q9=ceFvGEz~mBm_~`~tKjkOP+U#I7lj@Ez*iO@j)42cTk4Fx?`tB*&yEsJN>k>+SYyuW*Ya2TBJi?D?Y3a%5u=jM+$}Hf7BMn2GeYGq6uJ`cV*Ia@h2cKo? ziA9|K?JxdIPY@=3p87j3WH1#VPP}8a4!ilz4)0Z9k6=`F!s%SxCdZUP$fZ>PVoKEbK9D*{UyM6i<=dH?A4J=kogv((F3K_{Wh zh<|??3=E2fV`>IrKXvrO)kR$x=r zMoM z?we0z^vP9l(O+uX71Tm7oyq!snfGw1q~uT6qJSNF-uID|O!(|6qMG9?f_hJ$QPS6L z7%BE__1iAM`9$RIvlnmR-N0ofL6SnWq9H5r0Fjow@E^+$T}Xupy;9!=l}GY%W_ zYu$jy%Cx*!GasBhHEu@`cqqTG+;8_y#vqTbW9MN)KRgAR8Lmppz&j|!;cFT<+&X=( znO+T2Sbx)X$@sz!0*gPr_3aXYmBy$5e>fYIhU&ht{_a6+Nl;oWLD%kMoVarIrwCkv zj?Vl3kbx)L;g{6hG?3RnM=2p00<)uGuj@JF5aRT5%wD$&p(>OQ4lZXQOm$t&piiCP zM<$+p5;j7wp0J@#s{w%mP>Q$maviEXY%+lL5FSo+VJ7=AcTvIF-i_B z!(|{^?<*O>e@Yf_J*pfB7GX#ew_Jf+8A<$UX)t=KF0znple zNtpppmOi&^etvip32c6)NFnIr#5`7X#Nf<6+1g9cRmU>@;_mfQM}o@TmQTsg5%h^w zfQX=1a2yYE=^SW)*4~#c%zU~qzfQNVYVr)x-EBO}@_KMHS>csg&4XXy(Ot5971%M_ z2>$I>L11BH$t^-2>bGcY9I#D~{FuD)=P9j7uP_Q1*sMC_a|Y59?0jExxX&K= z&%65ns!xQ!tx?I1wgR}mOWo?ZQ~}?yxOh94B*eX;{uxeFhq&5nbc+87`bP22g>9K) z_^r~5P_+{DdFb=2ml=eG#|I|bZ-a5)Fv0e8}_2Ra0NCvfIbNxy12oD(Mk zSB}fTrL8@L~s4ZOmLn16p#ZWRM ze7e`{7@P`zMigwEM*RKFQdVm#1iX4OV>w+6yI Date: Tue, 21 May 2024 12:50:15 -0400 Subject: [PATCH 09/17] create_musica_from_counts compatible with DBS --- R/load_data.R | 61 ++++++++++++++++++++++++++++++++++++++------- inst/shiny/server.R | 2 +- 2 files changed, 53 insertions(+), 10 deletions(-) diff --git a/R/load_data.R b/R/load_data.R index a3d52b78..aab50d77 100644 --- a/R/load_data.R +++ b/R/load_data.R @@ -808,14 +808,14 @@ create_musica_from_variants <- function(x, genome, #' @param variant_class Mutations are SBS, DBS, or Indel. #' @return Returns a musica object #' @examples -#' maf_file <- system.file("extdata", "public_TCGA.LUSC.maf", -#' package = "musicatk") -#' musica <- create_musica_from_counts(x = count_table, variant_class = "SBS96") +#' #maf_file <- system.file("extdata", "public_TCGA.LUSC.maf", +#' #package = "musicatk") +#' #musica <- create_musica_from_counts(x = count_table, variant_class = "SBS96") #' @export create_musica_from_counts <- function(x, variant_class) { - if (canCoerce(count_table, "matrix")) { - count_table <- as.matrix(count_table) + if (canCoerce(x, "matrix")) { + x <- as.matrix(x) } else { stop("'count_table' needs to be an object which can be coerced to a matrix. ") } @@ -825,7 +825,7 @@ create_musica_from_counts <- function(x, variant_class) { if (variant_class %in% c("snv", "SNV", "SNV96", "SBS", "SBS96")) { - if (nrow(count_table) != 96){ + if (nrow(x) != 96){ stop("SBS96 'count_table' must have 96 rows.") } @@ -850,10 +850,10 @@ create_musica_from_counts <- function(x, variant_class) { "T>G" = "#E7C9C6FF") # update count table rownames with SBS96 standard naming - rownames(count_table) <- annotation$motif + rownames(x) <- annotation$motif # create count table object - tab <- new("count_table", name = "SBS96", count_table = count_table, + tab <- new("count_table", name = "SBS96", count_table = x, annotation = annotation, features = as.data.frame(annotation$motif[1]), type = S4Vectors::Rle("SBS"), color_variable = "mutation", color_mapping = color_mapping, description = paste0("Single Base Substitution table with", @@ -863,7 +863,50 @@ create_musica_from_counts <- function(x, variant_class) { tables(musica)[["SBS96"]] <- tab } else if (variant_class %in% c("DBS", "dbs", "doublet")) { - stop("Not yet supproted.") + if (nrow(x) != 78){ + stop("DBS78 'count_table' must have 78 rows.") + } + + full_motif <- c(paste0("AC>NN", "_", c("CA", "CG", "CT", "GA", "GG", "GT", + "TA", "TG", "TT")), + paste0("AT>NN", "_", c("CA", "CC", "CG", "GA", "GC", "TA")), + paste0("CC>NN", "_", c("AA", "AG", "AT", "GA", "GG", "GT", "TA", + "TG", "TT")), + paste0("CG>NN", "_", c("AT", "GC", "GT", "TA", "TC", "TT")), + paste0("CT>NN", "_", c("AA", "AC", "AG", "GA", "GC", "GG", "TA", + "TC", "TG")), + paste0("GC>NN", "_", c("AA", "AG", "AT", "CA", "CG", "TA")), + paste0("TA>NN", "_", c("AT", "CG", "CT", "GC", "GG", "GT")), + paste0("TC>NN", "_", c("AA", "AG", "AT", "CA", "CG", "CT", "GA", + "GG", "GT")), + paste0("TG>NN", "_", c("AA", "AC", "AT", "CA", "CC", "CT", "GA", + "GC", "GT")), + paste0("TT>NN", "_", c("AA", "AC", "AG", "CA", "CC", "CG", "GA", + "GC", "GG"))) + + annotation <- data.frame(motif = full_motif, mutation = + unlist(lapply(strsplit(full_motif, "_"), "[[", 1)), + context = unlist(lapply(strsplit(full_motif, "_"), + "[[", 2)), + row.names = full_motif) + + color_mapping <- .gg_color_hue(length(unique(annotation$mutation))) + names(color_mapping) <- unique(annotation$mutation) + + rownames(x) <- annotation$motif + + # create count table object + tab <- new("count_table", name = "DBS78", count_table = x, + annotation = annotation, features = as.data.frame(annotation$motif[1]), + type = S4Vectors::Rle("DBS"), color_variable = "mutation", + color_mapping = color_mapping, description = paste0("Standard count table for ", + "double base substitutions", + "using COSMIC v3 schema")) + + # add count table to musica object + tables(musica)[["DBS78"]] <- tab + + } else if (variant_class %in% c("INDEL", "Indel", "indel", "ind", "IND", "ID")) { stop("Not yet supported.") diff --git a/inst/shiny/server.R b/inst/shiny/server.R index dfdeb03e..19c738c2 100644 --- a/inst/shiny/server.R +++ b/inst/shiny/server.R @@ -1052,7 +1052,7 @@ parse_delete_event <- function(idstr) { input$combined_table_name) }, error = function(cond) { shinyalert::shinyalert(title = "Error", text = cond$message) - shinybuys::hide_spinner() + shinybusy::hide_spinner() }) shinybusy::hide_spinner() showNotification("Table created.") From e76b9f0f1c75c54f241ef096b0fc026e8a72af5f Mon Sep 17 00:00:00 2001 From: Natasha Gurevich Date: Tue, 11 Jun 2024 16:24:06 -0400 Subject: [PATCH 10/17] Removing deconstructSigs --- R/discovery_prediction.R | 279 +++++++++++++++---------------- man/auto_predict_grid.Rd | 2 +- man/auto_subset_sigs.Rd | 2 +- man/predict_exposure.Rd | 12 +- tests/testthat/test-signatures.R | 4 +- 5 files changed, 144 insertions(+), 155 deletions(-) diff --git a/R/discovery_prediction.R b/R/discovery_prediction.R index 1ca7fc5d..155f0007 100644 --- a/R/discovery_prediction.R +++ b/R/discovery_prediction.R @@ -111,19 +111,15 @@ discover_signatures <- function(musica, table_name, num_signatures, #' @description Exposures for samples will be predicted using an existing set #' of signatures stored in a \code{\linkS4class{musica_result}} object. #' Algorithms available for prediction include a modify version of \code{"lda"}, -#' \code{"decompTumor2Sig"}, and \code{"deconstructSigs"}. +#' and \code{"decompTumor2Sig"}. #' @param musica A \code{\linkS4class{musica}} object. -#' @param g A \linkS4class{BSgenome} object indicating which genome -#' reference the variants and their coordinates were derived from. Only used -#' if \code{algorithm = "deconstructSigs"} #' @param table_name Name of table used for posterior prediction. #' Must match the table type used to generate the prediction signatures #' @param signature_res Signatures used to predict exposures for the samples #' \code{musica} object. Existing signatures need to stored in a #' \code{\linkS4class{musica_result}} object. #' @param algorithm Algorithm to use for prediction of exposures. One of -#' \code{"lda"}, \code{"decompTumor2Sig"}, or -#' \code{"deconstructSigs"}. +#' \code{"lda"} or \code{"decompTumor2Sig"}. #' @param signatures_to_use Which signatures in the \code{signature_res} result #' object to use. Default is to use all signatures. #' @param verbose If \code{TRUE}, progress will be printing. Only used if @@ -144,9 +140,8 @@ discover_signatures <- function(musica, table_name, num_signatures, #' predict_exposure(musica = musica, table_name = "SBS96", #' signature_res = cosmic_v2_sigs, algorithm = "lda") #' @export -predict_exposure <- function(musica, g, table_name, signature_res, - algorithm = c("lda", "decompTumor2Sig", - "deconstructSigs"), +predict_exposure <- function(musica, table_name, signature_res, + algorithm = c("lda", "decompTumor2Sig"), signatures_to_use = seq_len(ncol( signatures(signature_res))), verbose = FALSE) { algorithm <- match.arg(algorithm) @@ -166,30 +161,30 @@ predict_exposure <- function(musica, g, table_name, signature_res, colnames(exposures) <- colnames(counts_table) rownames(exposures) <- colnames(signature) algorithm_name <- "decompTumor2Sig" - }else if (algorithm %in% c("ds", "deconstruct", "deconstructSigs")) { - sigs.input <- deconstructSigs::mut.to.sigs.input(mut.ref = variants(musica), - sample.id = "sample", chr = "chr", pos = "start", - ref = "ref", alt = "alt", bsg = g) - sig_all <- t(signature) - middle <- unlist(lapply(strsplit(colnames(sig_all), "_"), "[", 1)) - context <- lapply(strsplit(colnames(sig_all), "_"), "[", 2) - first <- unlist(lapply(context, substr, 1, 1)) - last <- unlist(lapply(context, substr, 3, 3)) - new_cols <- paste(first, "[", middle, "]", last, sep = "") - colnames(sig_all) <- new_cols - - ds_res <- vapply(rownames(sigs.input), function(x) { - ds_result <- whichSignatures(tumor_ref = sigs.input, - contexts_needed = TRUE, - signatures_limit = ncol(signature), - tri_counts_method = "default", - sample_id = x, signatures_ref = sig_all) - return(as.matrix(ds_result$weights)) - }, FUN.VALUE = rep(0, ncol(signature))) - exposures <- ds_res - colnames(exposures) <- colnames(counts_table) - rownames(exposures) <- colnames(signature) - algorithm_name <- "deconstructSigs" + #}else if (algorithm %in% c("ds", "deconstruct", "deconstructSigs")) { + #sigs.input <- deconstructSigs::mut.to.sigs.input(mut.ref = variants(musica), + # sample.id = "sample", chr = "chr", pos = "start", + # ref = "ref", alt = "alt", bsg = g) + #sig_all <- t(signature) + #middle <- unlist(lapply(strsplit(colnames(sig_all), "_"), "[", 1)) + #context <- lapply(strsplit(colnames(sig_all), "_"), "[", 2) + #first <- unlist(lapply(context, substr, 1, 1)) + #last <- unlist(lapply(context, substr, 3, 3)) + #new_cols <- paste(first, "[", middle, "]", last, sep = "") + #colnames(sig_all) <- new_cols +# + # ds_res <- vapply(rownames(sigs.input), function(x) { + # ds_result <- whichSignatures(tumor_ref = sigs.input, + # contexts_needed = TRUE, + # signatures_limit = ncol(signature), + # tri_counts_method = "default", + # sample_id = x, signatures_ref = sig_all) + #return(as.matrix(ds_result$weights)) + #}, FUN.VALUE = rep(0, ncol(signature))) + #exposures <- ds_res + #colnames(exposures) <- colnames(counts_table) + #rownames(exposures) <- colnames(signature) + #algorithm_name <- "deconstructSigs" } else { stop("Type must be lda or decomp") } @@ -301,127 +296,127 @@ predict_decompTumor2Sig <- function(sample_mat, signature_mat) { message(dim(lflank)) } -whichSignatures <- function(tumor_ref = NA, - sample_id, - signatures_ref, - associated = c(), - signatures_limit = NA, - signature_cutoff = 0.06, - contexts_needed = FALSE, - tri_counts_method = "default") { - if (is(tumor_ref, 'matrix')) { - stop(paste("Input tumor.ref needs to be a data frame or location of ", - "input text file", sep = "")) - } - - if (exists("tumor.ref", mode = "list") | is(tumor_ref, "data.frame")) { - tumor <- tumor_ref - if(contexts_needed == TRUE) { - tumor <- deconstructSigs::getTriContextFraction(mut.counts.ref = tumor, - trimer.counts.method = - tri_counts_method) - } - } else { - if (file.exists(tumor_ref)) { - tumor <- utils::read.table(tumor_ref, sep = "\t", header = TRUE, - as.is = TRUE, check.names = FALSE) - if (contexts_needed == TRUE) { - tumor <- deconstructSigs::getTriContextFraction(tumor, - trimer.counts.method = - tri_counts_method) - } - } else { - message("tumor.ref is neither a file nor a loaded data frame") - } - } - - if (missing(sample_id) && nrow(tumor) == 1) { - sample_id <- rownames(tumor)[1] - } - # Take patient id given - tumor <- as.matrix(tumor) - if (!sample_id %in% rownames(tumor)) { - stop(paste(sample_id, " not found in rownames of tumor.ref", sep = "")) - } - tumor <- subset(tumor, rownames(tumor) == sample_id) - if (round(rowSums(tumor), digits = 1) != 1) { - stop(paste0("Sample: ", sample_id, " is not normalized. Consider using ", - "contexts.needed = TRUE", sep = " ")) - } - signatures <- signatures_ref - - signatures <- as.matrix(signatures) - original_sigs <- signatures +#whichSignatures <- function(tumor_ref = NA, +# sample_id, +# signatures_ref, +# associated = c(), +# signatures_limit = NA, +# signature_cutoff = 0.06, +# contexts_needed = FALSE, +# tri_counts_method = "default") { +# if (is(tumor_ref, 'matrix')) { +# stop(paste("Input tumor.ref needs to be a data frame or location of ", +# "input text file", sep = "")) +# } + +# if (exists("tumor.ref", mode = "list") | is(tumor_ref, "data.frame")) { +# tumor <- tumor_ref +# if(contexts_needed == TRUE) { +# tumor <- deconstructSigs::getTriContextFraction(mut.counts.ref = tumor, +# trimer.counts.method = +# tri_counts_method) +# } +# } else { +# if (file.exists(tumor_ref)) { +# tumor <- utils::read.table(tumor_ref, sep = "\t", header = TRUE, +# as.is = TRUE, check.names = FALSE) +# if (contexts_needed == TRUE) { +# tumor <- deconstructSigs::getTriContextFraction(tumor, +# trimer.counts.method = +# tri_counts_method) +# } +# } else { +# message("tumor.ref is neither a file nor a loaded data frame") +# } +# } + +# if (missing(sample_id) && nrow(tumor) == 1) { +# sample_id <- rownames(tumor)[1] +# } +# # Take patient id given +# tumor <- as.matrix(tumor) +# if (!sample_id %in% rownames(tumor)) { +# stop(paste(sample_id, " not found in rownames of tumor.ref", sep = "")) +# } +# tumor <- subset(tumor, rownames(tumor) == sample_id) +# if (round(rowSums(tumor), digits = 1) != 1) { +# stop(paste0("Sample: ", sample_id, " is not normalized. Consider using ", +# "contexts.needed = TRUE", sep = " ")) +# } +# signatures <- signatures_ref + +# signatures <- as.matrix(signatures) +# original_sigs <- signatures # Check column names are formatted correctly - if (length(colnames(tumor)[colnames(tumor) %in% colnames(signatures)]) < - length(colnames(signatures))) { - colnames(tumor) <- deconstructSigs::changeColumnNames(colnames(tumor)) - if (length(colnames(tumor)[colnames(tumor) %in% colnames(signatures)]) < - length(colnames(signatures))) { - stop("Check column names on input file") - } - } +# if (length(colnames(tumor)[colnames(tumor) %in% colnames(signatures)]) < +# length(colnames(signatures))) { +# colnames(tumor) <- deconstructSigs::changeColumnNames(colnames(tumor)) +# if (length(colnames(tumor)[colnames(tumor) %in% colnames(signatures)]) < +# length(colnames(signatures))) { +# stop("Check column names on input file") +# } +# } # Ensure that columns in tumor match the order of those in signatures - tumor <- tumor[, colnames(signatures), drop = FALSE] +# tumor <- tumor[, colnames(signatures), drop = FALSE] #Take a subset of the signatures - if (!is.null(associated)) { - signatures <- signatures[rownames(signatures) %in% associated, ] - } +# if (!is.null(associated)) { +# signatures <- signatures[rownames(signatures) %in% associated, ] +# } - if (is.na(signatures_limit)) { - signatures_limit <- nrow(signatures) - } +# if (is.na(signatures_limit)) { +# signatures_limit <- nrow(signatures) +# } #Set the weights matrix to 0 - weights <- matrix(0, nrow = nrow(tumor), ncol = nrow(signatures), - dimnames = list(rownames(tumor), - rownames(signatures))) - - seed <- deconstructSigs::findSeed(tumor, signatures) - weights[seed] <- 1 - w <- weights * 10 - - error_diff <- Inf - error_threshold <- 1e-3 - - num <- 0 - while (error_diff > error_threshold) { - num <- num + 1 - #message(num) - error_pre <- deconstructSigs::getError(tumor, signatures, w) - if (error_pre == 0) { - break - } - w <- deconstructSigs::updateW_GR(tumor, signatures, w, - signatures.limit = - signatures_limit) - error_post <- deconstructSigs::getError(tumor, signatures, w) - error_diff <- (error_pre - error_post) / error_pre - } - - weights <- w / sum(w) - unknown <- 0 +# weights <- matrix(0, nrow = nrow(tumor), ncol = nrow(signatures), +# dimnames = list(rownames(tumor), +# rownames(signatures))) + +# seed <- deconstructSigs::findSeed(tumor, signatures) +# weights[seed] <- 1 +# w <- weights * 10 + +# error_diff <- Inf +# error_threshold <- 1e-3 + +# num <- 0 +# while (error_diff > error_threshold) { +# num <- num + 1 +# #message(num) +# error_pre <- deconstructSigs::getError(tumor, signatures, w) +# if (error_pre == 0) { +# break +# } +# w <- deconstructSigs::updateW_GR(tumor, signatures, w, +# signatures.limit = +# signatures_limit) +# error_post <- deconstructSigs::getError(tumor, signatures, w) +# error_diff <- (error_pre - error_post) / error_pre +# } + +# weights <- w / sum(w) +# unknown <- 0 ## filtering on a given threshold value (0.06 default) - weights[weights < signature_cutoff] <- 0 - unknown <- 1 - sum(weights) +# weights[weights < signature_cutoff] <- 0 +# unknown <- 1 - sum(weights) - product <- weights %*% signatures - diff <- tumor - product +# product <- weights %*% signatures +# diff <- tumor - product - x <- matrix(data = 0, nrow = 1, ncol = nrow(original_sigs), - dimnames = list(rownames(weights), rownames(original_sigs))) - x <- data.frame(x) - x[colnames(weights)] <- weights - weights <- x +# x <- matrix(data = 0, nrow = 1, ncol = nrow(original_sigs), +# dimnames = list(rownames(weights), rownames(original_sigs))) +# x <- data.frame(x) +# x[colnames(weights)] <- weights +# weights <- x - out <- list(weights, tumor, product, diff, unknown) - names(out) <- c("weights", "tumor", "product", "diff", "unknown") - return(out) -} +# out <- list(weights, tumor, product, diff, unknown) +# names(out) <- c("weights", "tumor", "product", "diff", "unknown") +# return(out) +#} #' Generate result_grid from musica based on annotation and range of k #' @@ -539,7 +534,7 @@ reconstruct_sample <- function(result, sample_number) { #' @param table_name Name of table used for posterior prediction (e.g. SBS96) #' @param signature_res Signatures to automatically subset from for prediction #' @param algorithm Algorithm to use for prediction. Choose from -#' "lda_posterior", decompTumor2Sig, and deconstructSigs +#' "lda_posterior" and decompTumor2Sig #' @param sample_annotation Annotation to grid across, if none given, #' prediction subsetting on all samples together #' @param min_exists Threshold to consider a signature active in a sample @@ -612,7 +607,7 @@ auto_predict_grid <- function(musica, table_name, signature_res, algorithm, #' @param table_name Name of table used for posterior prediction (e.g. SBS96) #' @param signature_res Signatures to automatically subset from for prediction #' @param algorithm Algorithm to use for prediction. Choose from -#' "lda_posterior", decompTumor2Sig, and deconstructSigs +#' "lda_posterior" and decompTumor2Sig #' @param min_exists Threshold to consider a signature active in a sample #' @param proportion_samples Threshold of samples to consider a signature #' active in the cohort diff --git a/man/auto_predict_grid.Rd b/man/auto_predict_grid.Rd index 26673cba..25d79ec8 100644 --- a/man/auto_predict_grid.Rd +++ b/man/auto_predict_grid.Rd @@ -26,7 +26,7 @@ auto_predict_grid( \item{signature_res}{Signatures to automatically subset from for prediction} \item{algorithm}{Algorithm to use for prediction. Choose from -"lda_posterior", decompTumor2Sig, and deconstructSigs} +"lda_posterior" and decompTumor2Sig} \item{sample_annotation}{Annotation to grid across, if none given, prediction subsetting on all samples together} diff --git a/man/auto_subset_sigs.Rd b/man/auto_subset_sigs.Rd index 136b1f00..a5518d41 100644 --- a/man/auto_subset_sigs.Rd +++ b/man/auto_subset_sigs.Rd @@ -22,7 +22,7 @@ auto_subset_sigs( \item{signature_res}{Signatures to automatically subset from for prediction} \item{algorithm}{Algorithm to use for prediction. Choose from -"lda_posterior", decompTumor2Sig, and deconstructSigs} +"lda_posterior" and decompTumor2Sig} \item{min_exists}{Threshold to consider a signature active in a sample} diff --git a/man/predict_exposure.Rd b/man/predict_exposure.Rd index 22fb6a9c..c0e0afa6 100644 --- a/man/predict_exposure.Rd +++ b/man/predict_exposure.Rd @@ -6,10 +6,9 @@ \usage{ predict_exposure( musica, - g, table_name, signature_res, - algorithm = c("lda", "decompTumor2Sig", "deconstructSigs"), + algorithm = c("lda", "decompTumor2Sig"), signatures_to_use = seq_len(ncol(signatures(signature_res))), verbose = FALSE ) @@ -17,10 +16,6 @@ predict_exposure( \arguments{ \item{musica}{A \code{\linkS4class{musica}} object.} -\item{g}{A \linkS4class{BSgenome} object indicating which genome -reference the variants and their coordinates were derived from. Only used -if \code{algorithm = "deconstructSigs"}} - \item{table_name}{Name of table used for posterior prediction. Must match the table type used to generate the prediction signatures} @@ -29,8 +24,7 @@ Must match the table type used to generate the prediction signatures} \code{\linkS4class{musica_result}} object.} \item{algorithm}{Algorithm to use for prediction of exposures. One of -\code{"lda"}, \code{"decompTumor2Sig"}, or -\code{"deconstructSigs"}.} +\code{"lda"} or \code{"decompTumor2Sig"}.} \item{signatures_to_use}{Which signatures in the \code{signature_res} result object to use. Default is to use all signatures.} @@ -47,7 +41,7 @@ predicted from these signatures. Exposures for samples will be predicted using an existing set of signatures stored in a \code{\linkS4class{musica_result}} object. Algorithms available for prediction include a modify version of \code{"lda"}, -\code{"decompTumor2Sig"}, and \code{"deconstructSigs"}. +and \code{"decompTumor2Sig"}. } \examples{ data(musica) diff --git a/tests/testthat/test-signatures.R b/tests/testthat/test-signatures.R index 4de945a6..cfe58872 100644 --- a/tests/testthat/test-signatures.R +++ b/tests/testthat/test-signatures.R @@ -3,8 +3,8 @@ library("musicatk") test_that(desc = "Inputs are correct", { expect_error(discover_signatures(data.frame(0)), regexp = "must be a") - g <- select_genome("19") + #g <- select_genome("19") expect_error(predict_exposure(methods::new("musica", variants = - data.table::data.table(0)), g, + data.table::data.table(0)), "SBS96", cosmic_v2_sigs), regexp = "malformed") }) From 8938579a773adbbf231503c8432e741ef29ac55b Mon Sep 17 00:00:00 2001 From: Natasha Gurevich Date: Tue, 11 Jun 2024 16:27:58 -0400 Subject: [PATCH 11/17] Documentation updates to correct warnings --- DESCRIPTION | 9 +++++---- R/umap.R | 22 +++++++++++----------- man/built_tables.Rd | 4 ++-- man/create_umap.Rd | 2 +- man/get_musica.Rd | 2 +- man/plot_umap.Rd | 16 ++++++++-------- man/samp_annot.Rd | 10 +++++----- man/sample_names.Rd | 6 +++--- man/tables.Rd | 8 ++++---- man/variants.Rd | 6 +++--- 10 files changed, 43 insertions(+), 42 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 28e9b898..d0960640 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,7 +3,7 @@ Type: Package Title: Mutational Signature Comprehensive Analysis Toolkit Version: 1.9.1 Authors@R: c(person("Aaron", "Chevalier", email = "atgc@bu.edu", role = "cre", comment = c(ORCHID = "0000-0002-3968-9250")), person(given=c("Joshua","D."), family="Campbell", email="camp@bu.edu", role=c("aut"), comment = c(ORCID = "0000-0003-0780-8662"))) -Description: Mutational signatures are carcinogenic exposures or aberrant cellular processes that can cause alterations to the genome. We created musicatk (MUtational SIgnature Comprehensive Analysis ToolKit) to address shortcomings in versatility and ease of use in other pre-existing computational tools. Although many different types of mutational data have been generated, current software packages do not have a flexible framework to allow users to mix and match different types of mutations in the mutational signature inference process. Musicatk enables users to count and combine multiple mutation types, including SBS, DBS, and indels. Musicatk calculates replication strand, transcription strand and combinations of these features along with discovery from unique and proprietary genomic feature associated with any mutation type. Musicatk also implements several methods for discovery of new signatures as well as methods to infer exposure given an existing set of signatures. Musicatk provides functions for visualization and downstream exploratory analysis including the ability to compare signatures between cohorts and find matching signatures in COSMIC V2 or COSMIC V3. +Description: Mutational signatures are carcinogenic exposures or aberrant cellular processes that can cause alterations to the genome. We created musicatk (MUtational SIgnature Comprehensive Analysis ToolKit) to address shortcomings in versatility and ease of use in other pre-existing computational tools. Although many different types of mutational data have been generated, current software packages do not have a flexible framework to allow users to mix and match different types of mutations in the mutational signature inference process. Musicatk enables users to count and combine multiple mutation types, including SBS, DBS, and indels. Musicatk calculates replication strand, transcription strand and combinations of these features along with discovery from unique and proprietary genomic feature associated with any mutation type. Musicatk also implements several methods for discovery of new signatures as well as methods to infer exposure given an existing set of signatures. Musicatk provides functions for visualization and downstream exploratory analysis including the ability to compare signatures between cohorts and find matching signatures in COSMIC V2 or COSMIC V3. License: LGPL-3 BugReports: https://github.com/campbio/musicatk/issues Encoding: UTF-8 @@ -14,7 +14,7 @@ biocViews: SomaticMutation, VariantAnnotation Depends: - R (>= 4.0.0), + R (>= 4.0.0), NMF Imports: SummarizedExperiment, @@ -48,7 +48,6 @@ Imports: BSgenome.Hsapiens.UCSC.hg38, BSgenome.Mmusculus.UCSC.mm9, BSgenome.Mmusculus.UCSC.mm10, - deconstructSigs, decompTumor2Sig, topicmodels, ggrepel, @@ -63,7 +62,9 @@ Imports: stringi, tidyverse, ggpubr, - Matrix (>= 1.6.4) + Matrix (>= 1.6.4), + scales, + lsei Suggests: TCGAbiolinks, shinyBS, diff --git a/R/umap.R b/R/umap.R index ebb56074..462e33f4 100644 --- a/R/umap.R +++ b/R/umap.R @@ -1,6 +1,6 @@ #' @title Create a UMAP from a musica result -#' @description Proportional sample exposures will be used as input into the +#' @description Proportional sample exposures will be used as input into the #' \code{\link[uwot]{umap}} function to generate a two dimensional UMAP. #' @param result A \code{\linkS4class{musica_result}} object generated by #' a mutational discovery or prediction tool. @@ -21,7 +21,7 @@ #' stored in the \code{UMAP} slot. #' @seealso See \link{plot_umap} to display the UMAP and #' \code{\link[uwot]{umap}} for more information on the individual parameters -#' for generating UMAPs. +#' for generating UMAPs. #' @examples #' #data(res_annot) #' #create_umap(result = res_annot) @@ -53,15 +53,15 @@ create_umap <- function(result, n_neighbors = 30, #' @title Plot a UMAP from a musica result #' @description Plots samples on a UMAP scatterplot. Samples can be colored by -#' the levels of mutational signatures or by a annotation variable. +#' the levels of mutational signatures or by a annotation variable. #' #' @param result A \code{\linkS4class{musica_result}} object generated by #' a mutational discovery or prediction tool. #' @param color_by One of \code{"signatures"}, \code{"annotation"}, or #' \code{"none"}. If \code{"signatures"}, then one UMAP scatterplot will be -#' generated for each signature and points will be colored by the level of -#' that signature in each sample. If \code{annotation}, a single UMAP will -#' be generated colored by the annotation selected using the parameter +#' generated for each signature and points will be colored by the level of +#' that signature in each sample. If \code{annotation}, a single UMAP will +#' be generated colored by the annotation selected using the parameter #' \code{annotation}. If \code{"none"}, a single UMAP scatterplot will be #' generated with no coloring. Default \code{"signature"}. #' @param proportional If \code{TRUE}, then the exposures will be normalized @@ -74,18 +74,18 @@ create_umap <- function(result, n_neighbors = 30, #' scale in each signature subplot. If \code{FALSE}, then each signature subplot #' will be colored by a different scale with different maximum values. Only #' used when \code{color_by = "signature"}. Setting to \code{FALSE} is most -#' useful when the maximum value of various signatures are vastly different +#' useful when the maximum value of various signatures are vastly different #' from one another. Default \code{TRUE}. #' @param add_annotation_labels If \code{TRUE}, labels for each group in the #' \code{annotation} variable will be displayed. Only used if -#' code{color_by = "annotation"}. This not recommended if the annotation is +#' \code{color_by = "annotation"}. This not recommended if the annotation is #' a continuous variable. The label is plotting using the centriod of each #' group within the \code{annotation} variable. Default \code{FALSE}. -#' @param annotation_label_size Size of annotation labels. Only used if -#' code{color_by = "annotation"} and \code{add_annotation_labels = TRUE}. +#' @param annotation_label_size Size of annotation labels. Only used if +#' \code{color_by = "annotation"} and \code{add_annotation_labels = TRUE}. #' Default \code{3}. #' @param annotation_text_box Place a white box around the annotation labels -#' to improve readability. Only used if code{color_by = "annotation"} and +#' to improve readability. Only used if \code{color_by = "annotation"} and #' \code{add_annotation_labels = TRUE}. Default \code{TRUE}. #' @param plotly If \code{TRUE}, the the plot will be made interactive #' using \code{\link[plotly]{plotly}}. Not used if \code{color_by = "signature"} diff --git a/man/built_tables.Rd b/man/built_tables.Rd index 8e9689f3..8bbdd130 100644 --- a/man/built_tables.Rd +++ b/man/built_tables.Rd @@ -4,7 +4,7 @@ \alias{built_tables} \alias{built_tables,musica-method} \alias{built_tables,musica_result-method} -\title{Retrieve the names of count_tables from a musica or musica_result +\title{Retrieve the names of count_tables from a musica or musica_result object} \usage{ built_tables(object) @@ -23,7 +23,7 @@ object generated by a mutational discovery or prediction tool.} The names of created count_tables } \description{ -The \code{count_tables} contains standard and/or custom +The \code{count_tables} contains standard and/or custom count tables created from variants } \examples{ diff --git a/man/create_umap.Rd b/man/create_umap.Rd index 66770fcf..981cf312 100644 --- a/man/create_umap.Rd +++ b/man/create_umap.Rd @@ -31,7 +31,7 @@ A \code{\linkS4class{musica_result}} object with a new UMAP stored in the \code{UMAP} slot. } \description{ -Proportional sample exposures will be used as input into the +Proportional sample exposures will be used as input into the \code{\link[uwot]{umap}} function to generate a two dimensional UMAP. } \examples{ diff --git a/man/get_musica.Rd b/man/get_musica.Rd index 3d786d55..a4a7bb19 100644 --- a/man/get_musica.Rd +++ b/man/get_musica.Rd @@ -17,7 +17,7 @@ a mutational discovery or prediction tool.} A \code{\linkS4class{musica}} musica object } \description{ -The \code{\linkS4class{musica}} musica contains variants, +The \code{\linkS4class{musica}} musica contains variants, count tables, and sample annotations } \examples{ diff --git a/man/plot_umap.Rd b/man/plot_umap.Rd index eb6a3974..20b6d092 100644 --- a/man/plot_umap.Rd +++ b/man/plot_umap.Rd @@ -26,9 +26,9 @@ a mutational discovery or prediction tool.} \item{color_by}{One of \code{"signatures"}, \code{"annotation"}, or \code{"none"}. If \code{"signatures"}, then one UMAP scatterplot will be -generated for each signature and points will be colored by the level of -that signature in each sample. If \code{annotation}, a single UMAP will -be generated colored by the annotation selected using the parameter +generated for each signature and points will be colored by the level of +that signature in each sample. If \code{annotation}, a single UMAP will +be generated colored by the annotation selected using the parameter \code{annotation}. If \code{"none"}, a single UMAP scatterplot will be generated with no coloring. Default \code{"signature"}.} @@ -45,21 +45,21 @@ when \code{color_by = "annotation"}. Default \code{NULL}.} scale in each signature subplot. If \code{FALSE}, then each signature subplot will be colored by a different scale with different maximum values. Only used when \code{color_by = "signature"}. Setting to \code{FALSE} is most -useful when the maximum value of various signatures are vastly different +useful when the maximum value of various signatures are vastly different from one another. Default \code{TRUE}.} \item{add_annotation_labels}{If \code{TRUE}, labels for each group in the \code{annotation} variable will be displayed. Only used if -code{color_by = "annotation"}. This not recommended if the annotation is +\code{color_by = "annotation"}. This not recommended if the annotation is a continuous variable. The label is plotting using the centriod of each group within the \code{annotation} variable. Default \code{FALSE}.} -\item{annotation_label_size}{Size of annotation labels. Only used if -code{color_by = "annotation"} and \code{add_annotation_labels = TRUE}. +\item{annotation_label_size}{Size of annotation labels. Only used if +\code{color_by = "annotation"} and \code{add_annotation_labels = TRUE}. Default \code{3}.} \item{annotation_text_box}{Place a white box around the annotation labels -to improve readability. Only used if code{color_by = "annotation"} and +to improve readability. Only used if \code{color_by = "annotation"} and \code{add_annotation_labels = TRUE}. Default \code{TRUE}.} \item{plotly}{If \code{TRUE}, the the plot will be made interactive diff --git a/man/samp_annot.Rd b/man/samp_annot.Rd index 08b1810d..1ed9b1ea 100644 --- a/man/samp_annot.Rd +++ b/man/samp_annot.Rd @@ -23,7 +23,7 @@ samp_annot(object, name) <- value } \arguments{ \item{object}{A \code{\linkS4class{musica}} object generated by -the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, +the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, or a \code{\linkS4class{musica_result}} object generated by a mutational discovery or prediction tool.} @@ -33,13 +33,13 @@ object generated by a mutational discovery or prediction tool.} the same length as the number of samples in the object.} } \value{ -A new object with the sample annotations added to the table in the +A new object with the sample annotations added to the table in the \code{sample_annotations} slot. } \description{ Sample annotations can be used to store information about -each sample such as tumor type or treatment status. These are used in -downstream plotting functions such as \code{\link{plot_exposures}} or +each sample such as tumor type or treatment status. These are used in +downstream plotting functions such as \code{\link{plot_exposures}} or \code{\link{plot_umap}} to group or color samples by a particular annotation. } \examples{ @@ -53,6 +53,6 @@ data(musica) samp_annot(musica, "example") <- rep("ex", 7) } \seealso{ -See \code{\link{sample_names}} to get a vector of sample names in +See \code{\link{sample_names}} to get a vector of sample names in the \code{\linkS4class{musica}} or \code{\linkS4class{musica_result}} object. } diff --git a/man/sample_names.Rd b/man/sample_names.Rd index 8ec754db..ca7c04e8 100644 --- a/man/sample_names.Rd +++ b/man/sample_names.Rd @@ -14,7 +14,7 @@ sample_names(object) } \arguments{ \item{object}{A \code{\linkS4class{musica}} object generated by -the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, +the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, or a \code{\linkS4class{musica_result}} object generated by a mutational discovery or prediction tool.} } @@ -24,9 +24,9 @@ A character vector of sample names \description{ Sample names were included in the \code{sample} column in the variant object passed to \code{\link{create_musica_from_variants}}, or in -the colnames of the count table object passed to +the colnames of the count table object passed to \code{\link{create_musica_from_counts}}. This returns -a unique list of samples names in the order they are inside the +a unique list of samples names in the order they are inside the \code{\linkS4class{musica}} object. } \examples{ diff --git a/man/tables.Rd b/man/tables.Rd index 4b8dab75..2bcec70c 100644 --- a/man/tables.Rd +++ b/man/tables.Rd @@ -6,7 +6,7 @@ \alias{tables,musica_result-method} \alias{tables<-} \alias{tables<-,musica,list-method} -\title{Retrieve the list of count_tables from a musica or musica_result +\title{Retrieve the list of count_tables from a musica or musica_result object} \usage{ tables(object) @@ -26,18 +26,18 @@ or a \code{\linkS4class{musica_result}} object generated by a mutational discovery or prediction tool.} \item{musica}{A \code{\linkS4class{musica}} object generated by -the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, +the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, or a \code{\linkS4class{musica_result}} object generated by a mutational discovery or prediction tool.} -\item{value}{A list of \code{\linkS4class{count_table}} objects representing +\item{value}{A list of \code{\linkS4class{count_table}} objects representing counts of motifs in samples} } \value{ A list of count_tables } \description{ -The \code{count_tables} contains standard and/or custom +The \code{count_tables} contains standard and/or custom count tables created from variants } \examples{ diff --git a/man/variants.Rd b/man/variants.Rd index e29e4ef8..a7c91863 100644 --- a/man/variants.Rd +++ b/man/variants.Rd @@ -20,21 +20,21 @@ variants(musica) <- value } \arguments{ \item{object}{A \code{\linkS4class{musica}} object generated by -the \link{create_musica_from_variants} or \link {create_musica_from_counts} function, +the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, or a \code{\linkS4class{musica_result}} object generated by a mutational discovery or prediction tool.} \item{musica}{A \code{\linkS4class{musica}} object generated by the \link{create_musica_from_variants} or \link{create_musica_from_counts} function} -\item{value}{A \code{\linkS4class{data.table}} of mutational variants and +\item{value}{A \code{\linkS4class{data.table}} of mutational variants and variant-level annotations} } \value{ A data.table of variants } \description{ -The \code{variants} \code{data.table} contains the variants +The \code{variants} \code{data.table} contains the variants and variant-level annotations } \examples{ From f31d59182c91668383ba320aeaf10b2e852c5099 Mon Sep 17 00:00:00 2001 From: Natasha Gurevich Date: Mon, 17 Jun 2024 17:52:18 -0400 Subject: [PATCH 12/17] Benchmarking functions, sample data, and documentation --- NAMESPACE | 41 + R/benchmarking.R | 1505 +++++++++++++++++++++ R/load_data.R | 27 +- R/main_class.R | 68 + R/methods.R | 767 ++++++++++- R/test_data.R | 30 + data/example_predicted_exp.rda | Bin 0 -> 13798 bytes data/example_predicted_sigs.rda | Bin 0 -> 6320 bytes man/adjustment_threshold.Rd | 32 + man/benchmark.Rd | 57 + man/benchmark_compare_results.Rd | 29 + man/benchmark_get_entry.Rd | 22 + man/benchmark_get_prediction.Rd | 25 + man/benchmark_plot_comparison.Rd | 41 + man/benchmark_plot_composite_exposures.Rd | 24 + man/benchmark_plot_duplicate_exposures.Rd | 24 + man/benchmark_plot_exposures.Rd | 28 + man/benchmark_plot_signatures.Rd | 83 ++ man/create_benchmark.Rd | 19 + man/create_musica_result.Rd | 24 + man/description.Rd | 30 + man/example_predicted_exp.Rd | 17 + man/example_predicted_sigs.Rd | 17 + man/final_comparison.Rd | 34 + man/final_pred.Rd | 33 + man/full_benchmark-class.Rd | 23 + man/ground_truth.Rd | 32 + man/indv_benchmarks.Rd | 32 + man/initial_comparison.Rd | 32 + man/initial_pred.Rd | 33 + man/intermediate_comparison.Rd | 35 + man/intermediate_pred.Rd | 34 + man/method_id.Rd | 31 + man/method_view_summary.Rd | 31 + man/sig_view_summary.Rd | 32 + man/single_benchmark-class.Rd | 37 + man/threshold.Rd | 30 + 37 files changed, 3335 insertions(+), 24 deletions(-) create mode 100644 R/benchmarking.R create mode 100644 data/example_predicted_exp.rda create mode 100644 data/example_predicted_sigs.rda create mode 100644 man/adjustment_threshold.Rd create mode 100644 man/benchmark.Rd create mode 100644 man/benchmark_compare_results.Rd create mode 100644 man/benchmark_get_entry.Rd create mode 100644 man/benchmark_get_prediction.Rd create mode 100644 man/benchmark_plot_comparison.Rd create mode 100644 man/benchmark_plot_composite_exposures.Rd create mode 100644 man/benchmark_plot_duplicate_exposures.Rd create mode 100644 man/benchmark_plot_exposures.Rd create mode 100644 man/benchmark_plot_signatures.Rd create mode 100644 man/create_benchmark.Rd create mode 100644 man/create_musica_result.Rd create mode 100644 man/description.Rd create mode 100644 man/example_predicted_exp.Rd create mode 100644 man/example_predicted_sigs.Rd create mode 100644 man/final_comparison.Rd create mode 100644 man/final_pred.Rd create mode 100644 man/full_benchmark-class.Rd create mode 100644 man/ground_truth.Rd create mode 100644 man/indv_benchmarks.Rd create mode 100644 man/initial_comparison.Rd create mode 100644 man/initial_pred.Rd create mode 100644 man/intermediate_comparison.Rd create mode 100644 man/intermediate_pred.Rd create mode 100644 man/method_id.Rd create mode 100644 man/method_view_summary.Rd create mode 100644 man/sig_view_summary.Rd create mode 100644 man/single_benchmark-class.Rd create mode 100644 man/threshold.Rd diff --git a/NAMESPACE b/NAMESPACE index 462b642e..d6071d0a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,18 +1,42 @@ # Generated by roxygen2: do not edit by hand export("%>%") +export("adjustment_threshold<-") +export("description<-") export("exposures<-") +export("final_comparison<-") +export("final_pred<-") +export("ground_truth<-") +export("indv_benchmarks<-") +export("initial_comparison<-") +export("initial_pred<-") +export("intermediate_comparison<-") +export("intermediate_pred<-") +export("method_id<-") +export("method_view_summary<-") export("samp_annot<-") +export("sig_view_summary<-") export("signatures<-") export("tables<-") +export("threshold<-") export("umap<-") export("variants<-") export(add_flank_to_variants) +export(adjustment_threshold) export(annotate_replication_strand) export(annotate_transcript_strand) export(annotate_variant_length) export(annotate_variant_type) export(auto_predict_grid) +export(benchmark) +export(benchmark_compare_results) +export(benchmark_get_entry) +export(benchmark_get_prediction) +export(benchmark_plot_comparison) +export(benchmark_plot_composite_exposures) +export(benchmark_plot_duplicate_exposures) +export(benchmark_plot_exposures) +export(benchmark_plot_signatures) export(build_custom_table) export(build_standard_table) export(built_tables) @@ -23,9 +47,12 @@ export(compare_cosmic_v2) export(compare_cosmic_v3) export(compare_results) export(cosmic_v2_subtype_map) +export(create_benchmark) export(create_musica_from_counts) export(create_musica_from_variants) +export(create_musica_result) export(create_umap) +export(description) export(discover_signatures) export(drop_annotation) export(exposure_differential_analysis) @@ -37,9 +64,19 @@ export(extract_variants_from_maf_file) export(extract_variants_from_matrix) export(extract_variants_from_vcf) export(extract_variants_from_vcf_file) +export(final_comparison) +export(final_pred) export(generate_result_grid) export(get_musica) +export(ground_truth) +export(indv_benchmarks) +export(initial_comparison) +export(initial_pred) +export(intermediate_comparison) +export(intermediate_pred) export(k_select) +export(method_id) +export(method_view_summary) export(musicatk) export(name_signatures) export(plot_cluster) @@ -56,6 +93,7 @@ export(rc) export(samp_annot) export(sample_names) export(select_genome) +export(sig_view_summary) export(signatures) export(subset_musica_by_annotation) export(subset_musica_by_counts) @@ -63,12 +101,15 @@ export(subset_variant_by_type) export(subset_variants_by_samples) export(table_selected) export(tables) +export(threshold) export(umap) export(variants) exportClasses(count_table) +exportClasses(full_benchmark) exportClasses(musica) exportClasses(musica_result) exportClasses(musica_result_grid) +exportClasses(single_benchmark) import(dplyr) import(ggplot2) import(tidyr) diff --git a/R/benchmarking.R b/R/benchmarking.R new file mode 100644 index 00000000..57bb903f --- /dev/null +++ b/R/benchmarking.R @@ -0,0 +1,1505 @@ +#' Create a full_benchmark object +#' +#' Initialize a \code{\linkS4class{full_benchmark}} object for benchmarking +#' +#' @param true_signatures A matrix of true signatures by mutational motifs +#' @param true_exposures A matrix of samples by true signature weights +#' +#' @return A \code{\linkS4class{full_benchmark}} object +#' @export +create_benchmark <- function(true_signatures, true_exposures){ + + # create musica result object to hold true exposures and signatures + truth <- create_musica_result(true_signatures, true_exposures) + + # create benchmark object + full_benchmark <- new("full_benchmark", ground_truth = truth) + + return(full_benchmark) + +} + + +#' Run the benchmark framework on a prediction +#' +#' Perform benchmarking on a signature discovery prediction compared +#' to a ground truth. Potential errors in the predicted signatures, such as +#' composite or duplicate signatures are adjusted, and a summary of the accuracy +#' of the prediction is given. +#' +#' @param full_benchmark An object of class \code{\linkS4class{full_benchmark}} +#' created with the \link{create_benchmark} function or returned from a previous +#' \code{benchmark} run. +#' @param prediction An object of class \code{\linkS4class{musica_result}} +#' containing the predicted signatures and exposures to benchmark. +#' @param method_id An identifier for the prediction being benchmarked. If not +#' supplied, it will be automatically set to the variable name of the prediction +#' provided. Default \code{NULL}. +#' @param threshold Cosine similarity cutoff for comparing preidcted and true +#' signatures. Default \code{0.8}. +#' @param adjustment_threshold Cosine similarity value of high confidence. +#' Comparisons that meet this cutoff are assumed to be likely, +#' while those that fall below the cutoff will be disregarded if the predicted +#' signature is already captured above the threshold. Default \code{0.9}. +#' @param description Further details about the prediction being benchmarked. +#' Default \code{NULL}. +#' @param plot If \code{FALSE}, plots will be suppressed. Default \code{TRUE}. +#' @param make_copy If \code{TRUE}, the \code{full_benchmark} object provided +#' will not be modified and a new object will be returned. If \code{FALSE}, the +#' object provided will be modified and nothing will be returned. Default +#' \code{FALSE}. +#' +#' @return If \code{make_copy == TRUE}, a new \code{full_benchmark} object is +#' returned. If \code{make_copy == FALSE}, nothing is returned. +#' @export +benchmark <- function(full_benchmark, prediction, method_id = NULL, + threshold = 0.8, adjustment_threshold = 0.9, description = NULL, + plot = TRUE, make_copy = FALSE){ + + + if (make_copy == FALSE){ + var_name <- deparse(substitute(full_benchmark)) + } + + # check that full_benchmark is a full_benchmark class object + if (class(full_benchmark)[1] != "full_benchmark"){ + stop(deparse(substitute(full_benchmark)), " is not a 'full_benchmark' object. ", + "Use function 'create_benchmark' to initialize a 'full_benchmark' object") + } + + # check that prediction is a musica result object + if (class(prediction)[1] != "musica_result"){ + stop("'prediction' must be a 'musica_result' object.") + } + + # if no ID provided, try to make one automatically + if (is.null(method_id)){ + + # create ID + method_id <- deparse(substitute(prediction)) + + # display message that ID was automatically generated + message("No method_id provided, automatically generated method_id: ", method_id) + } + + # check if ID is unique + if (method_id %in% names(indv_benchmarks(full_benchmark))){ + original_id <- method_id + + # update id to be unique + tag <- 1 + while (method_id %in% names(indv_benchmarks(full_benchmark))){ + if (tag > 1){ + method_id <- substr(method_id, 1, nchar(method_id)-2) + } + method_id <- paste(method_id, ".", tag, sep = "") + tag <- tag + 1 + } + + # display message that ID was not unique and was updated + message("method_id ", original_id, " already exists. method_id updated to ", method_id) + + } + + + if (threshold != 0.8){ + warning("Default threshold overriden. Interpret results with caution if + comparing benchmark runs with inconsistent thresholds.") + } + + if (adjustment_threshold != 0.9){ + warning("Default adjustment threshold overriden. Interpret results with caution if + comparing benchmark runs with inconsistent thresholds.") + } + + full_benchmark@ground_truth@musica <- get_musica(prediction) + + truth <- ground_truth(full_benchmark) + + message("\nComparing to true signatures (initial)...") + + initial_comparison <- compare_results(prediction, truth, threshold) + + # remove anything below 0.9 that already appears with at least 0.05 difference + initial_comparison <- .benchmark_comp_adj(initial_comparison, adjustment_threshold) + + message("Correcting duplicates...") + + duplicates_corrected <- .correct_duplicates(prediction, initial_comparison, truth) + + message("Comparing to true signatures (post duplicates corrected)...") + + duplicates_corrected_comparison <- compare_results(duplicates_corrected, truth, + threshold = threshold) + + + # remove anything below 0.9 that already appears with at least 0.05 difference + duplicates_corrected_comparison <- .benchmark_comp_adj(duplicates_corrected_comparison, adjustment_threshold) + + message("Correcting composites...") + + composites_corrected <- .correct_composites(duplicates_corrected, duplicates_corrected_comparison, get_musica(truth), truth) + + message("Comparing to true signatures (post composites corrected)...") + + composites_corrected_comparison <- compare_results(composites_corrected, truth, threshold = threshold) + + # remove anything below 0.9 that already appears with at least 0.05 difference + composites_corrected_comparison <- .benchmark_comp_adj(composites_corrected_comparison, adjustment_threshold) + + # extract count table + count_table <- extract_count_tables(get_musica(prediction)) + count_table <- count_table$SBS96@count_table + + message("Creating summary...") + + single_summary <- as.matrix(.generate_summary(method_id, prediction, truth, initial_comparison, + count_table, composites_corrected, composites_corrected_comparison)) + + message("Creating individual benchmark object...") + + # create single benchmark object + indv_benchmark <- new("single_benchmark", initial_pred = prediction, intermediate_pred = duplicates_corrected, + final_pred = composites_corrected, initial_comparison = initial_comparison, + intermediate_comparison = duplicates_corrected_comparison, + final_comparison = composites_corrected_comparison, + single_summary = single_summary, method_id = method_id, threshold = threshold, + adjustment_threshold = adjustment_threshold, description = description) + + # update full benchmark object + message("Updating full benchmark object...") + full_benchmark <- .update_benchmark(full_benchmark, indv_benchmark, single_summary) + + # update global variable + if (make_copy == FALSE){ + assign(var_name, full_benchmark, envir = parent.frame()) + } + + if (plot == TRUE){ + + # plots + message("Generating plots...") + + #message("\nInitial Signatures...\n") + initial_sig_plot <- benchmark_plot_signatures(full_benchmark, method_id, prediction = "Initial", same_scale = FALSE) + print(initial_sig_plot) + #message("\nInitial Comparison...\n") + benchmark_plot_comparison(full_benchmark, method_id, prediction = "Initial", same_scale = FALSE) + #message("\nInitial Exposure Comparison...\n") + #benchmark_plot_exposures(full_benchmark, method_id, prediction = "Initial") + + #message("\nDuplicate signature exposures before/afters...\n") + benchmark_plot_duplicate_exposures(full_benchmark, method_id) + #print(duplicate_plot) + + #message("\nIntermediate Signatures...\n") + intermediate_sig_plot <- benchmark_plot_signatures(full_benchmark, method_id, prediction = "Intermediate", same_scale = FALSE) + print(intermediate_sig_plot) + #message("\nIntermediate Comparison...\n") + benchmark_plot_comparison(full_benchmark, method_id, prediction = "Intermediate", same_scale = FALSE) + #message("\nIntermediate Exposure Comparison...\n") + #benchmark_plot_exposures(full_benchmark, method_id, prediction = "Intermediate") + + #message("\nComposite signature exposures before/afters...\n") + benchmark_plot_composite_exposures(full_benchmark, method_id) + #print(composite_plot) + + #message("\nFinal Signatures...\n") + final_sig_plot <- benchmark_plot_signatures(full_benchmark, method_id, prediction = "Final", same_scale = FALSE) + print(final_sig_plot) + #message("\nFinal Comparison...\n") + benchmark_plot_comparison(full_benchmark, method_id, prediction = "Final", same_scale = FALSE) + #message("\nFinal Exposure Comparison...\n") + exposure_plot <- benchmark_plot_exposures(full_benchmark, method_id, prediction = "Final") + print(exposure_plot) + + } + + print(single_summary) + + message("\nDone.\n") + + if (make_copy == TRUE){ + return(full_benchmark) + } + + + +} + + +#' Get a single_benchmark object +#' +#' Access a \code{\linkS4class{single_benchmark}} object containing information from +#' an individual benchmark run from a \code{\linkS4class{full_benchmark}} object. +#' +#' @param full_benchmark The \code{\linkS4class{full_benchmark}} object that contains +#' the desired \code{\linkS4class{single_benchmark}} object +#' @param method_id The identifier for the desired \code{\linkS4class{single_benchmark}} +#' object +#' +#' @return A \code{\linkS4class{single_benchmark}} object +#' @export +benchmark_get_entry <- function(full_benchmark, method_id){ + + # check that full_benchmark is a full_benchmark class object + if (class(full_benchmark)[1] != "full_benchmark"){ + stop(deparse(substitute(full_benchmark)), " is not a 'full_benchmark' object.") + } + + # check if this method_id exists + if (!(method_id %in% names(indv_benchmarks(full_benchmark)))){ + stop("'method_id' ", deparse(substitute(method_id)), " not found in ", deparse(substitute(full_benchmark))) + } + + benchmark <- indv_benchmarks(full_benchmark)[[method_id]] + + return(benchmark) +} + + +#' Get a benchmark prediction +#' +#' Access a \code{\linkS4class{musica_result}} object containing a particular prediction +#' from a benchmarking analysis. +#' +#' @param indv_benchmark A \code{\linkS4class{single_benchmark}} object containing the +#' desired prediction. This can be accessed using the \link{benchmark_get_entry} +#' function. +#' @param prediction \code{Initial} for the prediction before any benchmarking +#' adjustments have been made, \code{Intermediate} for the prediction after +#' duplicates have been adjusted but before composites are adjusted, or +#' \code{Final} for the prediction at the end of the benchmarking adjustments. +#' +#' @return A \code{\linkS4class{musica_result}} object +#' @export +benchmark_get_prediction <- function(indv_benchmark, prediction){ + + # check if prediction is one of Initial, Intermediate, or Final + valid <- c("Initial", "initial", "Init", "init", "Intermediate", "intermediate", + "Inter", "inter", "Final", "final", "Fin", "fin") + if (!(prediction %in% valid)){ + stop("'prediction' must be one of: 'Initial', 'Intermediate', or 'Final'.") + } + + # access musica object for desired prediction + if (prediction %in% c("Initial", "initial", "Init", "init")){ + result <- initial_pred(indv_benchmark) + } + else if (prediction %in% c("Intermediate", "intermediate", "Inter", "inter")){ + result <- intermediate_pred(indv_benchmark) + } + else if (prediction %in% c("Final", "final", "Fin", "fin")){ + result <- final_pred(indv_benchmark) + } + return(result) + +} + + +#' Plot signatures from a benchmarking analysis +#' +#' After a prediction has been benchmarked with the \link{benchmark} function, +#' this function can be used to plot signatures from any step in the benchmarking +#' process. Comparable to the \code{plot_signatures} function but compatible with +#' benchmarking objects. +#' +#' @param full_benchmark The \code{\linkS4class{full_benchmark}} object for the +#' benchmarking analysis +#' @param method_id The identifier for the \code{\linkS4class{single_benchmark}} +#' object containing the signatures to be plotted +#' @param prediction \code{Initial} for the signatures before any benchmarking +#' adjustments have been made, \code{Intermediate} for the signatures after +#' duplicates have been adjusted but before composites are adjusted, or +#' \code{Final} for the signatures at the end of the benchmarking adjustments. +#' @param plotly If \code{TRUE}, the the plot will be made interactive +#' using \code{\link[plotly]{plotly}}. Default \code{FALSE}. +#' @param color_variable Name of the column in the variant annotation data.frame +#' to use for coloring the mutation type bars. The variant annotation data.frame +#' can be found within the count table of the \code{\linkS4class{musica}} +#' object. If \code{NULL}, then the default column specified in the count +#' table will be used. Default \code{NULL}. +#' @param color_mapping A character vector used to map items in the +#' \code{color_variable} to a color. The items in \code{color_mapping} +#' correspond to the colors. The names of the items in \code{color_mapping} +#' should correspond to the uniqeu items in \code{color_variable}. If +#' \code{NULL}, then the default \code{color_mapping} specified in the count +#' table will be used. Default \code{NULL}. +#' @param text_size Size of axis text. Default \code{10}. +#' @param show_x_labels If \code{TRUE}, the labels for the mutation types +#' on the x-axis will be shown. Default \code{TRUE}. +#' @param show_y_labels If \code{TRUE}, the y-axis ticks and labels will be +#' shown. Default \code{TRUE}. +#' @param same_scale If \code{TRUE}, the scale of the probability for each +#' signature will be the same. If \code{FALSE}, then the scale of the y-axis +#' will be adjusted for each signature. Default \code{TRUE}. +#' @param y_max Vector of maximum y-axis limits for each signature. One value +#' may also be provided to specify a constant y-axis limit for all signatures. +#' Vector length must be 1 or equivalent to the number of signatures. Default +#' \code{NULL}. +#' @param annotation Vector of annotations to be displayed in the top right +#' corner of each signature. Vector length must be equivalent to the number of +#' signatures. Default \code{NULL}. +#' @param percent If \code{TRUE}, the y-axis will be represented in percent +#' format instead of mutation counts. Default \code{TRUE}. +#' +#' @return Generates a ggplot or plotly object +#' @export +benchmark_plot_signatures <- function(full_benchmark, method_id, prediction, + plotly = FALSE, color_variable = NULL, color_mapping = NULL, text_size = 10, + show_x_labels = TRUE, show_y_labels = TRUE, same_scale = TRUE, y_max = NULL, + annotation = NULL, percent = TRUE){ + + # check that full_benchmark is a full_benchmark class object + if (class(full_benchmark)[1] != "full_benchmark"){ + stop(deparse(substitute(full_benchmark)), " is not a 'full_benchmark' object.") + } + + # check if this method_id exists + if (!(method_id %in% names(indv_benchmarks(full_benchmark)))){ + stop("'method_id' ", deparse(substitute(method_id)), " not found in ", deparse(substitute(full_benchmark))) + } + + # check if prediction is one of Initial, Intermediate, or Final + valid <- c("Initial", "initial", "Init", "init", "Intermediate", "intermediate", + "Inter", "inter", "Final", "final", "Fin", "fin") + if (!(prediction %in% valid)){ + stop("'prediction' must be one of: 'Initial', 'Intermediate', or 'Final'.") + } + + # access individual benchmark object + indv_benchmark <- benchmark_get_entry(full_benchmark, method_id) + + # access musica object for desired prediction + result <- benchmark_get_prediction(indv_benchmark, prediction) + + signatures_plot <- plot_signatures(result, plotly = plotly, color_variable = color_variable, color_mapping = color_mapping, + text_size = text_size, show_x_labels = show_x_labels, show_y_labels = show_y_labels, + same_scale = same_scale, y_max = y_max, annotation = annotation, percent = percent) + + return(signatures_plot) + +} + + +#' Get benchmark comparison table +#' +#' After a prediction has been benchmarked with the \link{benchmark} function, +#' this function can be used to generate the comparison table between true and +#' predicted signatures from any step in the benchmarking process. +#' +#' @param full_benchmark The \code{\linkS4class{full_benchmark}} object for the +#' benchmarking analysis +#' @param method_id The identifier for the \code{\linkS4class{single_benchmark}} +#' object containing the comparison of interest +#' @param prediction \code{Initial} for the comparison before any benchmarking +#' adjustments have been made, \code{Intermediate} for the comparison after +#' duplicates have been adjusted but before composites are adjusted, or +#' \code{Final} for the comparison at the end of the benchmarking adjustments. +#' +#' @return A data.frame containing the comparison between true and predicted +#' signatures +#' @export +benchmark_compare_results <- function(full_benchmark, method_id, prediction){ + + # check that full_benchmark is a full_benchmark class object + if (class(full_benchmark)[1] != "full_benchmark"){ + stop(deparse(substitute(full_benchmark)), " is not a 'full_benchmark' object.") + } + + # check if this method_id exists + if (!(method_id %in% names(indv_benchmarks(full_benchmark)))){ + stop("'method_id' ", deparse(substitute(method_id)), " not found in ", deparse(substitute(full_benchmark))) + } + + # check if prediction is one of Initial, Intermediate, or Final + valid <- c("Initial", "initial", "Init", "init", "Intermediate", "intermediate", + "Inter", "inter", "Final", "final", "Fin", "fin") + if (!(prediction %in% valid)){ + stop("'prediction' must be one of: 'Initial', 'Intermediate', or 'Final'.") + } + + # access individual benchmark object + indv_benchmark <- benchmark_get_entry(full_benchmark, method_id) + + # access musica object for desired prediction + if (prediction == "Initial"){ + comparison <- initial_comparison(indv_benchmark) + } + else if (prediction == "Intermediate"){ + comparison <- intermediate_comparison(indv_benchmark) + } + else if (prediction == "Final"){ + comparison <- final_comparison(indv_benchmark) + } + + return(comparison) + +} + + +#' Plot a signature comparison from a benchmarking analysis +#' +#' After a prediction has been benchmarked with the \link{benchmark} function, +#' the comparison between the true and predicted signatures at any step of the +#' benchmarking process can be plotted. +#' +#' @param full_benchmark The \code{\linkS4class{full_benchmark}} object for the +#' benchmarking analysis +#' @param method_id The identifier for the \code{\linkS4class{single_benchmark}} +#' object containing the comparison of interest +#' @param prediction \code{Initial} for the comparison before any benchmarking +#' adjustments have been made, \code{Intermediate} for the comparison after +#' duplicates have been adjusted but before composites are adjusted, or +#' \code{Final} for the comparison at the end of the benchmarking adjustments. +#' @param decimals Specifies rounding for similarity metric displayed. Default +#' \code{2}. +#' @param same_scale If \code{TRUE}, the scale of the probability for each +#' comparison will be the same. If \code{FALSE}, then the scale of the y-axis +#' will be adjusted for each comparison. Default \code{TRUE}. +#' +#' @return Returns the comparison plot +#' @export +benchmark_plot_comparison <- function(full_benchmark, method_id, prediction, + decimals = 2, same_scale = FALSE){ + + # check that full_benchmark is a full_benchmark class object + if (class(full_benchmark)[1] != "full_benchmark"){ + stop(deparse(substitute(full_benchmark)), " is not a 'full_benchmark' object.") + } + + # check if this method_id exists + if (!(method_id %in% names(indv_benchmarks(full_benchmark)))){ + stop("'method_id' ", deparse(substitute(method_id)), " not found in ", deparse(substitute(full_benchmark))) + } + + # check if prediction is one of Initial, Intermediate, or Final + valid <- c("Initial", "initial", "Init", "init", "Intermediate", "intermediate", + "Inter", "inter", "Final", "final", "Fin", "fin") + if (!(prediction %in% valid)){ + stop("'prediction' must be one of: 'Initial', 'Intermediate', or 'Final'.") + } + + comparison <- benchmark_compare_results(full_benchmark, method_id, prediction) + indv_benchmark <- benchmark_get_entry(full_benchmark, method_id) + + # access musica result object for desired prediction + if (prediction == "Initial"){ + pred_res <- initial_pred(indv_benchmark) + res_name <- c("Initial Predicted Signatures") + } + else if (prediction == "Intermediate"){ + pred_res <- intermediate_pred(indv_benchmark) + res_name <- c("Intermediate Predicted Signatures") + } + else if (prediction == "Final"){ + pred_res <- final_pred(indv_benchmark) + res_name <- c("Final Predicted Signatures") + } + + + comparison_plot <- plot_comparison(comparison, pred_res, ground_truth(full_benchmark), + result_name = res_name, other_result_name = "True Signatures", + decimals = decimals, same_scale = same_scale) + + #return(comparison_plot) + +} + + +#' Plot exposure comparison from a benchmarking analysis +#' +#' After a prediction has been benchmarked with the \link{benchmark} function, +#' the comparison between the true and predicted exposures at any stage of the +#' benchmarking process can be plotted. +#' +#' @param full_benchmark The \code{\linkS4class{full_benchmark}} object for the +#' benchmarking analysis +#' @param method_id The identifier for the \code{\linkS4class{single_benchmark}} +#' object of interest +#' @param prediction \code{Initial} for the exposures before any benchmarking +#' adjustments have been made, \code{Intermediate} for the exposures after +#' duplicates have been adjusted but before composites are adjusted, or +#' \code{Final} for the exposures at the end of the benchmarking adjustments. +#' +#' @return Generates a ggplot object +#' @export +benchmark_plot_exposures <- function(full_benchmark, method_id, prediction){ + + Predicted <- NULL + True <- NULL + + # check that full_benchmark is a full_benchmark class object + if (class(full_benchmark)[1] != "full_benchmark"){ + stop(deparse(substitute(full_benchmark)), " is not a 'full_benchmark' object.") + } + + # check if this method_id exists + if (!(method_id %in% names(indv_benchmarks(full_benchmark)))){ + stop("'method_id' ", deparse(substitute(method_id)), " not found in ", deparse(substitute(full_benchmark))) + } + + # check if prediction is one of Initial, Intermediate, or Final + valid <- c("Initial", "initial", "Init", "init", "Intermediate", "intermediate", + "Inter", "inter", "Final", "final", "Fin", "fin") + if (!(prediction %in% valid)){ + stop("'prediction' must be one of: 'Initial', 'Intermediate', or 'Final'.") + } + + # access individual benchmark object + indv_benchmark <- benchmark_get_entry(full_benchmark, method_id) + + # access comparison for desired prediction + comparison <- benchmark_compare_results(full_benchmark, method_id, prediction) + + truth <- ground_truth(full_benchmark) + + # access musica object for desired prediction + prediction <- benchmark_get_prediction(indv_benchmark, prediction) + + predicted <- c() + true <- c() + sig <- c() + for (true_sig in comparison$y_sig_name){ + predicted_sig <- + comparison[comparison$y_sig_name == true_sig,4] + predicted <- c(predicted, exposures(prediction)[predicted_sig,]) + true <- c(true, exposures(truth)[,true_sig]) + sig <- c(sig, rep(true_sig, dim(exposures(truth))[1])) + } + + plot_df <- data.frame(Predicted = predicted, True = true, Sig = sig) + + compare_exposure_plot <- ggplot(plot_df, aes(x = Predicted, y = True)) + + geom_point(size = 3) + + facet_wrap(~Sig, scales = "free") + + scale_x_continuous(labels = scales::comma) + + scale_y_continuous(labels = scales::comma) + + theme_classic() + + labs(title = "Predicted vs True Activity Levels for Matched Signatures", + x="Predicted Activity", y = "True Activity") + + theme(text = element_text(size=15), axis.text = element_text(size = 15)) + + geom_abline() + + geom_smooth(method = "lm") + + theme(legend.title=element_blank()) + + return(compare_exposure_plot) + +} + + +#' Plot effect of duplicate correction +#' +#' After a prediction has been benchmarked with the \link{benchmark} function, +#' the true and predicted exposures can be plotted both before and after the +#' duplicate signature adjustment. The effect of the adjustment can then be +#' observed. +#' +#' @param full_benchmark The \code{\linkS4class{full_benchmark}} object for the +#' benchmarking analysis +#' @param method_id The identifier for the \code{\linkS4class{single_benchmark}} +#' object of interest +#' +#' @return Two ggplot images are displayed +#' @export +benchmark_plot_duplicate_exposures <- function(full_benchmark, method_id){ + + Predicted <- NULL + True <- NULL + + # check that full_benchmark is a full_benchmark class object + if (class(full_benchmark)[1] != "full_benchmark"){ + stop(deparse(substitute(full_benchmark)), " is not a 'full_benchmark' object.") + } + + # check if this method_id exists + if (!(method_id %in% names(indv_benchmarks(full_benchmark)))){ + stop("'method_id' ", deparse(substitute(method_id)), " not found in ", deparse(substitute(full_benchmark))) + } + + indv_benchmark <- benchmark_get_entry(full_benchmark, method_id) + + result_true <- ground_truth(full_benchmark) + + before <- initial_pred(indv_benchmark) + after <- intermediate_pred(indv_benchmark) + + comparison <- initial_comparison(indv_benchmark) + + num_samples <- dim(exposures(result_true))[1] + + freq <- table(comparison$y_sig_name) + duplicated_signatures <- names(freq[freq > 1]) + + for (duplicated_sig in duplicated_signatures){ + + before_exposures <- NULL + + sigs_to_merge <- comparison[comparison$y_sig_name == duplicated_sig & !grepl("like", comparison$x_sig_name), 4] + + for (signature in sigs_to_merge){ + + #tmp_exposures <- exposures(after)[signature,] + tmp_exposures <- as.data.frame(as.vector(exposures(before)[signature,])) + tmp_exposures_all <- cbind(tmp_exposures, + as.data.frame(as.numeric(exposures(result_true)[,duplicated_sig]))) + colnames(tmp_exposures_all) <- c("Predicted", "True") + tmp_exposures_all$Source <- signature + + before_exposures <- rbind(before_exposures, tmp_exposures_all) + } + + ## DOESNT WORK WHEN NOT ACTUALLY DUPICATE + rownames(before_exposures) <- c(1: (num_samples * length(sigs_to_merge))) + + before_plot <- ggplot(before_exposures, aes(x = Predicted, y = True)) + + geom_point(size = 3) + + facet_wrap(~Source, scales = "fixed") + + scale_x_continuous(labels = scales::comma, limits = c(0, max(max(before_exposures$Predicted), max(before_exposures$True)))) + + scale_y_continuous(labels = scales::comma,limits = c(0, max(max(before_exposures$Predicted), max(before_exposures$True)))) + + theme_classic() + + labs(title=paste("Exposures of duplicated signature, ", duplicated_sig, sep = ""), + subtitle = "Before merging", + x="Predicted Activity", y = "True Activity") + + theme(text = element_text(size=15), axis.text = element_text(size = 15)) + + geom_abline() + + geom_smooth(method = "lm") + + theme(legend.title=element_blank()) + + print(before_plot) + + new_sig_name <- "Merged Signature (" + for (sig in sigs_to_merge){ + if (sig == sigs_to_merge[1]){ + new_sig_name <- paste(new_sig_name, sig, sep = "") + } + else{ + new_sig_name <- paste(new_sig_name, ".", sig, sep = "") + } + } + new_sig_name <- paste(new_sig_name, ")", sep = "") + + # exposure plot after merging + + tmp_exposures_list2 <- as.data.frame(as.vector(exposures(after)[new_sig_name,])) + after_exposures <- cbind(tmp_exposures_list2, as.data.frame(as.numeric(exposures(result_true)[,duplicated_sig]))) + colnames(after_exposures) <- c("Predicted", "True") + + rownames(after_exposures) <- c(1:num_samples) + + after_plot <- ggplot(after_exposures, aes(x = Predicted, y = True)) + + geom_point(size = 3) + + scale_x_continuous(labels = scales::comma, limits = c(0, max(max(after_exposures$Predicted), max(after_exposures$True)))) + + scale_y_continuous(labels = scales::comma, limits = c(0, max(max(after_exposures$Predicted), max(after_exposures$True)))) + + theme_classic() + + labs(title=paste("Exposures of duplicated signature, ", duplicated_sig, sep = ""), + subtitle = "After merging", + x="Predicted Activity (Merged)", y = "True Activity") + + theme(text = element_text(size=15), axis.text = element_text(size = 15)) + + geom_abline() + + geom_smooth(method = "lm") + + theme(legend.title=element_blank()) + + print(after_plot) + + } + +} + + +#' Plot effect of composite correction +#' +#' After a prediction has been benchmarked with the \link{benchmark} function, +#' the true and predicted exposures can be plotted both before and after the +#' composite signature adjustment. The effect of the adjustment can then be +#' observed. +#' +#' @param full_benchmark The \code{\linkS4class{full_benchmark}} object for the +#' benchmarking analysis +#' @param method_id The identifier for the \code{\linkS4class{single_benchmark}} +#' object of interest +#' +#' @return Two ggplot images are displayed +#' @export +benchmark_plot_composite_exposures <- function(full_benchmark, method_id){ + + Predicted <- NULL + True <- NULL + + # check that full_benchmark is a full_benchmark class object + if (class(full_benchmark)[1] != "full_benchmark"){ + stop(deparse(substitute(full_benchmark)), " is not a 'full_benchmark' object.") + } + + # check if this method_id exists + if (!(method_id %in% names(indv_benchmarks(full_benchmark)))){ + stop("'method_id' ", deparse(substitute(method_id)), " not found in ", deparse(substitute(full_benchmark))) + } + + indv_benchmark <- benchmark_get_entry(full_benchmark, method_id) + + result_true <- ground_truth(full_benchmark) + + before <- intermediate_pred(indv_benchmark) + after <- final_pred(indv_benchmark) + + comparison <- intermediate_comparison(indv_benchmark) + + num_samples <- dim(exposures(result_true))[1] + + freq <- table(comparison$x_sig_name) + composite_signatures <- names(freq[freq > 1]) + + for (composite_sig in composite_signatures){ + + before_exposures <- NULL + + # list of SBS signatures that are the components of this composite signature + sig_components <- comparison[comparison$x_sig_name == composite_sig, 5] + + for (component_index in 1:length(sig_components)){ + + # for plotting + tmp_exp <- exposures(before)[composite_sig,] + tmp_exp_list <- as.data.frame(as.vector(tmp_exp)) + tmp_exp_all <- cbind(tmp_exp_list, + as.data.frame(as.numeric(exposures(result_true)[,sig_components[component_index]]))) + colnames(tmp_exp_all) <- c("Predicted", "True") + tmp_exp_all$Source <- sig_components[component_index] + + before_exposures <- rbind(before_exposures, tmp_exp_all) + + } + + rownames(before_exposures) <- c(1: (num_samples * length(sig_components))) + + before_plot <- ggplot(before_exposures, aes(x = Predicted, y = True)) + + geom_point(size = 3) + + facet_wrap(~Source, scales = "fixed") + + scale_x_continuous(labels = scales::comma, limits = c(0, max(max(before_exposures$Predicted), max(before_exposures$True)))) + + scale_y_continuous(labels = scales::comma, limits = c(0, max(max(before_exposures$Predicted), max(before_exposures$True)))) + + theme_classic() + + labs(title=paste("Exposures of composite signature, ", composite_sig, sep = ""), + subtitle = "Before decomposing", + x="Predicted Activity", y = "True Activity") + + theme(text = element_text(size=15), axis.text = element_text(size = 15)) + + geom_abline() + + geom_smooth(method = "lm") + + theme(legend.title=element_blank()) + + print(before_plot) + + colnames <- NULL + for (component in sig_components){ + #colnames <- c(colnames, paste("Signature", component, "_like", sep = "")) + colnames <- c(colnames, paste(component, "_like", sep = "")) + + } + colnames <- colnames[colnames %in% rownames(exposures(after))] + sig_components <- gsub("_like", "", colnames) + + after_exposures <- NULL + + for (component_index in 1:length(sig_components)){ + + # for plotting + tmp_exp <- exposures(after)[colnames[component_index],] + tmp_exp_list <- as.data.frame(as.vector(tmp_exp)) + tmp_exp_all <- cbind(tmp_exp_list, + as.data.frame(as.numeric(exposures(result_true)[,sig_components[component_index]]))) + colnames(tmp_exp_all) <- c("Predicted", "True") + tmp_exp_all$Source <- sig_components[component_index] + + after_exposures <- rbind(after_exposures, tmp_exp_all) + + } + + after_plot <- ggplot(after_exposures, aes(x = Predicted, y = True)) + + geom_point(size = 3) + + facet_wrap(~Source, scales = "fixed") + + scale_x_continuous(labels = scales::comma, limits = c(0, max(max(after_exposures$Predicted), max(after_exposures$True)))) + + scale_y_continuous(labels = scales::comma, limits = c(0, max(max(after_exposures$Predicted), max(after_exposures$True)))) + + theme_classic() + + labs(title=paste("Exposures of composite signature, ", composite_sig, sep = ""), + subtitle = "After decomposing", + x="Predicted Activity", y = "True Activity") + + theme(text = element_text(size=15), axis.text = element_text(size = 15)) + + geom_abline() + + geom_smooth(method = "lm") + + theme(legend.title=element_blank()) + + print(after_plot) + + } + + #combined_plot<- gridExtra::grid.arrange(before_plot, after_plot, ncol = 2) + #return(combined_plot) + + +} + + +# Function for addressing duplicates +.correct_duplicates <- function(result, compare_cosmic_result, result_true){ + + Sum <- NULL + Predicted <- NULL + True <- NULL + + exposures <- exposures(result) + signatures <- signatures(result) + num_samples <- dim(exposures)[2] + + freq <- table(compare_cosmic_result$y_sig_name) + duplicated_signatures <- names(freq[freq > 1]) + all_sigs_to_merge <- compare_cosmic_result[compare_cosmic_result$y_sig_name %in% duplicated_signatures & !grepl("like", compare_cosmic_result$x_sig_name), 4] + + corrected_sigs <- NULL + corrected_exposures <- NULL + + for (duplicated_sig in duplicated_signatures){ + + duplicate_exposures_all <- NULL + + sigs_to_merge <- compare_cosmic_result[compare_cosmic_result$y_sig_name == duplicated_sig & !grepl("like", compare_cosmic_result$x_sig_name), 4] + + # add when removed the below + full_sig_names_to_merge <- sigs_to_merge + #sigs_to_merge <- str_remove(sigs_to_merge, "Signature") + + # convert sig numbers to full names + #full_sig_names_to_merge <- NULL + #for (sig_number in sigs_to_merge){ + # sig_name <- paste("Signature", sig_number, sep = "") + # full_sig_names_to_merge <- c(full_sig_names_to_merge, sig_name) + #} + + sum <- 0 + for (signature in full_sig_names_to_merge){ + for (sample_index in 1:num_samples){ + temp <- signatures[, signature] * exposures[signature, sample_index] + sum <- sum + temp + } + + tmp_exposures <- exposures[signature,] + tmp_exposures_list <- as.data.frame(as.vector(tmp_exposures)) + tmp_exposures_all <- cbind(tmp_exposures_list, + as.data.frame(as.numeric(exposures(result_true)[,duplicated_sig]))) + colnames(tmp_exposures_all) <- c("Predicted", "True") + tmp_exposures_all$Source <- signature + + duplicate_exposures_all <- rbind(duplicate_exposures_all, tmp_exposures_all) + } + + ## DOESNT WORK WHEN NOT ACTUALLY DUPICATE + rownames(duplicate_exposures_all) <- c(1: (num_samples * length(sigs_to_merge))) + + plot <- ggplot(duplicate_exposures_all, aes(x = Predicted, y = True)) + + geom_point(size = 3) + + facet_wrap(~Source, scales = "fixed") + + scale_x_continuous(labels = scales::comma, limits = c(0, max(max(duplicate_exposures_all$Predicted), max(duplicate_exposures_all$True)))) + + scale_y_continuous(labels = scales::comma,limits = c(0, max(max(duplicate_exposures_all$Predicted), max(duplicate_exposures_all$True)))) + + theme_classic() + + labs(title=paste("Exposures of duplicated signature, ", duplicated_sig, sep = ""), + subtitle = "Before merging", + x="Predicted Activity", y = "True Activity") + + theme(text = element_text(size=15), axis.text = element_text(size = 15)) + + geom_abline() + + geom_smooth(method = "lm") + + theme(legend.title=element_blank()) + + #print(plot) + + + total_sum <- sum(sum) + + normalized <- sum / total_sum + + merged_signature <- as.matrix(normalized) + + new_sig_name <- "Merged Signature (" + for (sig in sigs_to_merge){ + if (sig == sigs_to_merge[1]){ + new_sig_name <- paste(new_sig_name, sig, sep = "") + } + else{ + new_sig_name <- paste(new_sig_name, ".", sig, sep = "") + } + } + new_sig_name <- paste(new_sig_name, ")", sep = "") + + colnames(merged_signature) <- new_sig_name + + corrected_sigs <- cbind(corrected_sigs, merged_signature) + + # update exposures + + merged_exposures <- 0 + for (signature in full_sig_names_to_merge){ + merged_exposures <- merged_exposures + exposures[signature,] + } + + merged_exposures <- t(as.matrix(merged_exposures)) + + rownames(merged_exposures) <- new_sig_name + + corrected_exposures <- rbind(corrected_exposures, merged_exposures) + + # exposure plot after merging + + tmp_exposures_list <- as.data.frame(as.vector(merged_exposures)) + tmp_exposures_all <- cbind(tmp_exposures_list, as.data.frame(as.numeric(exposures(result_true)[,duplicated_sig]))) + colnames(tmp_exposures_all) <- c("Predicted", "True") + + rownames(tmp_exposures_all) <- c(1:num_samples) + + plot <- ggplot(tmp_exposures_all, aes(x = Predicted, y = True)) + + geom_point(size = 3) + + scale_x_continuous(labels = scales::comma, limits = c(0, max(max(tmp_exposures_all$Predicted), max(tmp_exposures_all$True)))) + + scale_y_continuous(labels = scales::comma, limits = c(0, max(max(tmp_exposures_all$Predicted), max(tmp_exposures_all$True)))) + + theme_classic() + + labs(title=paste("Exposures of duplicated signature, ", duplicated_sig, sep = ""), + subtitle = "After merging", + x="Predicted Activity (Merged)", y = "True Activity") + + theme(text = element_text(size=15), axis.text = element_text(size = 15)) + + geom_abline() + + geom_smooth(method = "lm") + + theme(legend.title=element_blank()) + + #print(plot) + + } + + if (is.null(corrected_sigs) == FALSE){ + + unchanged_signatures <- as.data.frame(signatures)[, !colnames(signatures) %in% all_sigs_to_merge] + corrected_sigs <- cbind(unchanged_signatures, corrected_sigs) + + unchanged_exposures <- as.data.frame(exposures)[!rownames(exposures) %in% all_sigs_to_merge,] + corrected_exposures <- rbind(unchanged_exposures, corrected_exposures) + + } + + else{ + + unchanged_signatures <- as.data.frame(signatures)[, !colnames(signatures) %in% all_sigs_to_merge] + corrected_sigs <- unchanged_signatures + + unchanged_exposures <- as.data.frame(exposures)[!rownames(exposures) %in% all_sigs_to_merge,] + corrected_exposures <- unchanged_exposures + + } + + duplicates_corrected <- result + signatures(duplicates_corrected) <- as.matrix(corrected_sigs) + exposures(duplicates_corrected) <- as.matrix(corrected_exposures) + + return(duplicates_corrected) + +} + +# Function for addressing composites +.correct_composites <- function(result, compare_cosmic_result, musica, result_true){ + + Sum <- NULL + Predicted <- NULL + True <- NULL + + exposures <- exposures(result) + signatures <- signatures(result) + num_samples <- dim(exposures)[2] + + freq <- table(compare_cosmic_result$x_sig_name) + composite_signatures <- names(freq[freq > 1]) + #all_sigs_to_merge <- compare_cosmic_result[compare_cosmic_result$y_sig_name == duplicated_signatures, 4] + + corrected_sigs <- NULL + corrected_exposures <- NULL + + for (composite_sig in composite_signatures){ + + composite_exp_all <- NULL + + # the full signature to separate + sig_to_separate <- signatures(result)[, composite_sig] + exposures_to_separate <- exposures(result)[composite_sig,] + + # list of SBS signatures that are the componenets of this composite signature + sig_components <- compare_cosmic_result[compare_cosmic_result$x_sig_name == composite_sig, 5] + + # number of components in this composite sig + num_components <- length(sig_components) + + # data frame of full signatures of components + component_signatures <- as.data.frame(signatures(result_true)[,sig_components]) # GENERALIZE + + separated_sigs <- matrix(ncol = 0, nrow = 96) + separated_exposures <- matrix(ncol = num_samples, nrow = 0) + + # perform nnls + nnls_result <- lsei::nnls(as.matrix(component_signatures), as.vector(sig_to_separate)) + + for (component_index in 1:num_components){ + + new_signature <- component_signatures[ , component_index] * nnls_result$x[component_index] + if (sum(new_signature) != 0){ + separated_sigs <- cbind(separated_sigs, new_signature) + } + else{ + num_components <- num_components - 1 + sig_components <- sig_components[-component_index] + } + + } + + # renormalize + separated_sigs <- prop.table(separated_sigs,2) + + # update exposures + + num_samples <- dim(musica@count_tables$SBS96@count_table)[2] + + nnls_exposure_results <- data.frame(factor1 = numeric(), factor2 = numeric()) + + for (sample_index in 1:num_samples){ + + #A <- as.matrix(signatures(result_true)) + A <- as.matrix(separated_sigs) + #b <- as.vector(sig_to_separate * exposures_to_separate[sample_index]) + b <- as.vector(musica@count_tables$SBS96@count_table[,sample_index]) + + nnls_exposure_result <- lsei::nnls(A, b) + + nnls_exposure_results[sample_index,1] <- nnls_exposure_result$x[1] + nnls_exposure_results[sample_index,2] <- nnls_exposure_result$x[2] + + + } + + #sig_component_result <- predict_exposure(musica = musica, g = "hg38", table_name = "SBS96", + #signature_res = cosmic_v2_sigs, + #signatures_to_use = sig_components, algorithm = "lda") + + #tmp_exposures <- exposures(sig_component_result) + #tmp_exposures <- as.data.frame(t(tmp_exposures)) + + #tmp_exposures$Sum <- rowSums(tmp_exposures) + + # added + #nnls_exposure_results <- nnls_exposure_results[,c(6,7)] + #nnls_exposure_results <- nnls_exposure_results[,c(2,1)] + + nnls_exposure_results$Sum <- rowSums(nnls_exposure_results) + + for (component_index in 1:num_components){ + + # calculate the percent of the sum that each of the 96 channels contributes (reword this i know it doesnt make sense) + #tmp_exposures <- transform(tmp_exposures, percent1 = tmp_exposures[component_index] / Sum) + nnls_exposure_results <- transform(nnls_exposure_results, percent1 = nnls_exposure_results[component_index] / Sum) + + #separated_exposures <- rbind(separated_exposures, + #exposures_to_separate * tmp_exposures[, component_index + num_components + 1]) + separated_exposures <- rbind(separated_exposures, + exposures_to_separate * nnls_exposure_results[, component_index + num_components + 1]) + + + # for plotting + tmp_exp <- exposures[composite_sig,] + tmp_exp_list <- as.data.frame(as.vector(tmp_exp)) + tmp_exp_all <- cbind(tmp_exp_list, + as.data.frame(as.numeric(exposures(result_true)[,sig_components[component_index]]))) # GENERALIZE (was Signature not SBS) + colnames(tmp_exp_all) <- c("Predicted", "True") + tmp_exp_all$Source <- sig_components[component_index] + + composite_exp_all <- rbind(composite_exp_all, tmp_exp_all) + + } + + rownames(composite_exp_all) <- c(1: (num_samples * num_components)) + + plot <- ggplot(composite_exp_all, aes(x = Predicted, y = True)) + + geom_point(size = 3) + + facet_wrap(~Source, scales = "fixed") + + scale_x_continuous(labels = scales::comma, limits = c(0, max(max(composite_exp_all$Predicted), max(composite_exp_all$True)))) + + scale_y_continuous(labels = scales::comma, limits = c(0, max(max(composite_exp_all$Predicted), max(composite_exp_all$True)))) + + theme_classic() + + labs(title=paste("Exposures of composite signature, ", composite_sig, sep = ""), + subtitle = "Before decomposing", + x="Predicted Activity", y = "True Activity") + + theme(text = element_text(size=15), axis.text = element_text(size = 15)) + + geom_abline() + + geom_smooth(method = "lm") + + theme(legend.title=element_blank()) + + #print(plot) + + colnames <- NULL + for (component in sig_components){ + #colnames <- c(colnames, paste("Signature", component, "_like", sep = "")) + colnames <- c(colnames, paste(component, "_like", sep = "")) + + } + + colnames(separated_sigs) <- colnames + rownames(separated_exposures) <- colnames + + corrected_sigs <- cbind(corrected_sigs, separated_sigs) + corrected_exposures <- rbind(corrected_exposures, separated_exposures) + + # plot exposures after separation + + plot_exposures <- composite_exp_all + plot_exposures$Predicted <- c(t(separated_exposures)) + + plot <- ggplot(plot_exposures, aes(x = Predicted, y = True)) + + geom_point(size = 3) + + facet_wrap(~Source, scales = "fixed") + + scale_x_continuous(labels = scales::comma, limits = c(0, max(max(plot_exposures$Predicted), max(plot_exposures$True)))) + + scale_y_continuous(labels = scales::comma, limits = c(0, max(max(plot_exposures$Predicted), max(plot_exposures$True)))) + + theme_classic() + + labs(title=paste("Exposures of composite signature, ", composite_sig, sep = ""), + subtitle = "After decomposing", + x="Predicted Activity", y = "True Activity") + + theme(text = element_text(size=15), axis.text = element_text(size = 15)) + + geom_abline() + + geom_smooth(method = "lm") + + theme(legend.title=element_blank()) + + #print(plot) + + + } + + if (is.null(corrected_sigs) == FALSE){ + + unchanged_signatures <- as.data.frame(signatures)[, !colnames(signatures) %in% composite_signatures] + corrected_sigs <- cbind(unchanged_signatures, corrected_sigs) + + unchanged_exposures <- as.data.frame(exposures)[!rownames(exposures) %in% composite_signatures,] + corrected_exposures <- rbind(unchanged_exposures, corrected_exposures) + + } + + else{ + + unchanged_signatures <- as.data.frame(signatures)[, !colnames(signatures) %in% composite_signatures] + corrected_sigs <- unchanged_signatures + + unchanged_exposures <- as.data.frame(exposures)[!rownames(exposures) %in% composite_signatures,] + corrected_exposures <- unchanged_exposures + + } + + composites_corrected <- result + signatures(composites_corrected) <- as.matrix(corrected_sigs) + exposures(composites_corrected) <- as.matrix(corrected_exposures) + + return(composites_corrected) + +} + +# Function for adjusting comparison +.benchmark_comp_adj <- function(comparison, adjustment_threshold){ + + low_threshold_comp <- comparison[comparison$cosine <= adjustment_threshold,] + high_threshold_comp <- comparison[comparison$cosine > adjustment_threshold,] + + indexes_to_keep <- c() + for (index in 1:dim(low_threshold_comp)[1]){ + if (low_threshold_comp[index,4] %in% high_threshold_comp$x_sig_name == FALSE){ + indexes_to_keep <- c(indexes_to_keep, index) + } + else{ + existing_cs <- high_threshold_comp[high_threshold_comp$x_sig_name == low_threshold_comp[index,4], 1][1] + diff <- abs(existing_cs - low_threshold_comp[index,1]) + if (diff < 0.05){ + indexes_to_keep <- c(indexes_to_keep, index) + } + } + } + + comparison_adj <- rbind(high_threshold_comp, low_threshold_comp[indexes_to_keep,]) + + return(comparison_adj) + +} + +# Function for claculating RE +.get_reconstruction_error <- function(result, count_table){ + + # extract exposures and signatures matrices + expos <- exposures(result) + sigs <- signatures(result) + + # convert count table to probabilities + count_table_probs <- prop.table(count_table, 2) + + # convert exposure matrix to probabilities + expos_probs <- prop.table(expos, 2) + + # multiply signature and exposure matrices + predicted_count_probs <- sigs %*% expos_probs + + # calculate sum of differences (reconstruction error) + reconstruction_error <- sum(abs(count_table_probs - predicted_count_probs)) + + # return reconstruction error + return(reconstruction_error) + +} + +# Function to generate single run summary +.generate_summary <- function(title, result_all, result_true, comparison_results, count_table, final_musica, final_comparison){ + + # missing + + true_sig_names <- colnames(signatures(result_true)) + num_true <- length(true_sig_names) + num_missing <- length(true_sig_names[!(true_sig_names %in% comparison_results$y_sig_name)]) + + # spurious + + predicted_sig_names <- colnames(signatures(result_all)) + num_predicted <- length(predicted_sig_names) + num_spurious <- length(predicted_sig_names[!(predicted_sig_names %in% comparison_results$x_sig_name)]) + + # duplicate + + num_duplicates <- 0 + + freq <- table(comparison_results$y_sig_name) + + if(length(names(freq[freq > 1])) != 0){ + duplicated_signatures <- names(freq[freq > 1]) + duplicated_signature_components <- comparison_results[comparison_results$y_sig_name %in% duplicated_signatures, 4] + + for (duplicated_sig in duplicated_signatures){ + sigs_to_merge <- comparison_results[comparison_results$y_sig_name == duplicated_sig + & !grepl("like", comparison_results$x_sig_name), 4] + + num_duplicates <- num_duplicates + length(sigs_to_merge) + } + } + + # composite + + freq <- table(comparison_results$x_sig_name) + composite_sigs <- names(freq[freq > 1]) + num_composites <- length(composite_sigs) + + # dupcomp + + dupcomp_count <- 0 + if (exists("duplicated_signature_components")){ + for (sig in composite_sigs){ + if (sig %in% duplicated_signature_components){ + dupcomp_count <- dupcomp_count + 1 + num_composites <- num_composites - 1 + num_duplicates <- num_duplicates - 1 + } + } + } + + # matched + + num_direct_matches <- num_predicted - num_composites - num_duplicates - num_spurious - dupcomp_count + + # reconstruction error + + initial_reconstruction_error <- .get_reconstruction_error(result_all, count_table) + initial_reconstruction_error <- round(initial_reconstruction_error, 3) + end_reconstruction_error <- .get_reconstruction_error(final_musica, count_table) + end_reconstruction_error <- round(end_reconstruction_error, 3) + + # stability + + # average cosine similarity + + cs <- final_comparison$cosine + avg_cs <- mean(cs) + avg_cs <- round(avg_cs, 3) + + # min cosine similarity + + min_cs <- min(cs) + min_cs <- round(min_cs, 3) + + # max cosine similarity + + max_cs <- max(cs) + max_cs <- round(max_cs, 3) + + # signatures found + + sigs_found <- final_comparison$y_sig_name + num_found <- length(sigs_found) + sigs_found <- paste(sigs_found, collapse = ", ") # added when removed below + #sigs_found_ordered <- paste("SBS", sort(as.numeric(str_remove(sigs_found, "Signature"))), sep = "") + #sigs_found_ordered <- paste(sigs_found_ordered, collapse = ", ") + + + # put in table + + summary <- data.frame(matrix(, nrow=13, ncol=1)) + + rownames(summary) <- c("Num Found", "Num Direct Matched", "Num Missing", "Num Spurious", "Num Duplicates", "Num Composites", "Num Dup/Comp", "Initial RE", "Final RE", "Mean CS", "Min CS", "Max CS", "Sigs Found") + + colnames(summary) <- title + + summary[1,1] <- num_found + summary[2,1] <- num_direct_matches + summary[3,1] <- num_missing + summary[4,1] <- num_spurious + summary[5,1] <- num_duplicates + summary[6,1] <- num_composites + summary[7,1] <- dupcomp_count + summary[8,1] <- initial_reconstruction_error + summary[9,1] <- end_reconstruction_error + summary[10,1] <- avg_cs + summary[11,1] <- min_cs + summary[12,1] <- max_cs + summary[13,1] <- sigs_found + + + return(summary) + +} + +# Function to make sig view summary +.signature_view_summary <- function(benchmark){ + + indv_benchmarks <- indv_benchmarks(benchmark) + truth <- ground_truth(benchmark) + + final_comparison_list <- list() + method_list <- c() + + for (index in 1:length(indv_benchmarks)){ + + final_comparison_list[[index]] <- final_comparison(indv_benchmarks[[index]]) + method_list <- c(method_list, method_id(indv_benchmarks[[index]])) + + } + + summary_complete <- data.frame(matrix(, nrow=6, ncol=1)) + + sigs_found <- NULL + for (final_comparison in final_comparison_list){ + sigs_found <- c(sigs_found, final_comparison$y_sig_name) + } + + sigs_found <- unique(sigs_found) + sigs_found_ordered <- sigs_found + #sigs_found_ordered <- paste("Signature", sort(as.numeric(str_remove(sigs_found, "Signature"))), sep = "") + + all_sigs <- unique(c(sigs_found_ordered, colnames(signatures(truth)))) + all_sigs_ordered <- all_sigs + #all_sigs_ordered <- paste("Signature", sort(as.numeric(str_remove(all_sigs, "Signature"))), sep = "") + + for (sig in all_sigs_ordered){ + + summary <- data.frame(matrix(, nrow=6, ncol=1)) + rownames(summary) <- c("Times Found", "Times Missed", "Mean CS", "Min CS", "Max CS", "Methods Found") + colnames(summary) <- sig + + # times found + + count_found <- 0 + index_found <- NULL + index <- 1 + for (final_comparison in final_comparison_list){ + if (sig %in% final_comparison$y_sig_name == TRUE){ + count_found <- count_found + 1 + index_found <- c(index_found, index) + } + index <- index + 1 + } + + # times missed + + count_missed <- 0 + for (final_comparison in final_comparison_list){ + if (sig %in% final_comparison$y_sig_name == FALSE){ + count_missed <- count_missed + 1 + } + } + + # mean CS + + cs <- NULL + for (final_comparison in final_comparison_list){ + if (sig %in% final_comparison$y_sig_name == TRUE){ + cs <- c(cs, final_comparison[final_comparison$y_sig_name == sig, 1]) + } + } + + mean_cs <- mean(cs) + mean_cs <- round(mean_cs, 3) + if (is.null(cs)){ + mean_cs <- NA + } + + # min cs + + min_cs <- min(cs) + min_cs <- round(min_cs, 3) + if (is.null(cs)){ + min_cs <- NA + } + + # max cs + + max_cs <- max(cs) + max_cs <- round(max_cs, 3) + if (is.null(cs)){ + max_cs <- NA + } + + # methods + + methods <- method_list[c(index_found)] + methods <- paste(methods, collapse = ", ") + + summary[1,1] <- count_found + summary[2,1] <- count_missed + summary[3,1] <- mean_cs + summary[4,1] <- min_cs + summary[5,1] <- max_cs + summary[6,1] <- methods + + summary_complete <- cbind(summary_complete, summary) + + } + + summary_complete <- summary_complete[,-1] + summary_complete[sapply(summary_complete, is.infinite)] <- NA + + #summary_complete <- t(summary_complete) + + #print(summary_complete) + return(summary_complete) + +} + +# Function for updating full benchmark object +.update_benchmark <- function(full_benchmark, indv_benchmark, single_summary){ + + # update summary + if (dim(full_benchmark@method_view_summary)[1] == 0 ){ + full_benchmark@method_view_summary <- single_summary + }else{ + full_benchmark@method_view_summary <- cbind(full_benchmark@method_view_summary, single_summary) + } + + # update single benchmark list + indv_benchmarks(full_benchmark) <- append(indv_benchmarks(full_benchmark), list(indv_benchmark)) + names(indv_benchmarks(full_benchmark))[length(indv_benchmarks(full_benchmark))] <- method_id(indv_benchmark) + + # update sig view summary + sig_view_summary(full_benchmark) <- as.matrix(.signature_view_summary(full_benchmark)) + + return(full_benchmark) + +} + + diff --git a/R/load_data.R b/R/load_data.R index aab50d77..9a5f1293 100644 --- a/R/load_data.R +++ b/R/load_data.R @@ -917,7 +917,32 @@ create_musica_from_counts <- function(x, variant_class) { return(musica) } - +#' Create a musica_result object +#' +#' This function creates a \linkS4class{musica_result} object from signatures, +#' exposures, and a mutation count table. +#' +#' @param signatures A matrix or data.frame of signatures by mutational motifs +#' @param exposures A matrix or data.frame of samples by signature weights +#' @param count_table Summary table with per-sample unnormalized motif counts +#' @param algorithm Describes how the signatures/weights were generated +#' +#' @return A \linkS4class{musica_result} object +#' @export +create_musica_result <- function(signatures, exposures, count_table, algorithm = NULL){ + + # create musica result object with given exposures and signatures + musica_result <- new("musica_result", signatures = as.matrix(signatures), + exposures = as.matrix(exposures), table_name = "SBS96", + musica = create_musica_from_counts(count_table, "SBS96")) + + if (hasArg(algorithm)){ + musica_result@algorithm <- algorithm + } + + return(musica_result) + +} .check_variant_genome <- function(dt, genome) { diff --git a/R/main_class.R b/R/main_class.R index a8f27ab9..032a757a 100644 --- a/R/main_class.R +++ b/R/main_class.R @@ -314,3 +314,71 @@ get_count_type <- function(count_table) { get_color_mapping <- function(count_table) { return(count_table@color_mapping) } + +# Full Benchmark object/methods ------------------------------- + +#' Object that contains information for a full benchmarking analysis where +#' multiple predictions may be benchmarked +#' +#' @slot ground_truth musica_result +#' @slot method_view_summary matrix +#' @slot sig_view_summary matrix +#' @slot indv_benchmarks list +#' @export +#' @exportClass full_benchmark + +setClass( + "full_benchmark", + slots = list( + ground_truth = "musica_result", + method_view_summary = "matrix", + sig_view_summary = "matrix", + indv_benchmarks = "list" + ) +) + +# Single Benchmark object/methods ------------------------------- + +#' @title character or NULL class union +#' @description class to allow either NULL or character +#' @keywords internal +#' @noRd +setClassUnion("CharOrNULL", c("character", "NULL")) + +#' Object that contains information for a full benchmarking analysis where +#' multiple predictions may be benchmarked +#' +#' @slot initial_pred musica_result +#' @slot intermediate_pred musica_result +#' @slot final_pred musica_result +#' @slot initial_comparison data.frame +#' @slot intermediate_comparison data.frame +#' @slot final_comparison data.frame +#' @slot single_summary matrix +#' @slot method_id character +#' @slot threshold numeric +#' @slot adjustment_threshold numeric +#' @slot description CharOrNULL +#' @export +#' @exportClass single_benchmark + +setClass( + "single_benchmark", + slots = list( + initial_pred = "musica_result", + intermediate_pred = "musica_result", + final_pred = "musica_result", + initial_comparison = "data.frame", + intermediate_comparison = "data.frame", + final_comparison = "data.frame", + single_summary = "matrix", + method_id = "character", + threshold = "numeric", + adjustment_threshold = "numeric", + description = "CharOrNULL" + ) +) + + + + diff --git a/R/methods.R b/R/methods.R index c1838b1f..6d140aea 100644 --- a/R/methods.R +++ b/R/methods.R @@ -1,5 +1,5 @@ #' @title Retrieve musica from a musica_result object -#' @description The \code{\linkS4class{musica}} musica contains variants, +#' @description The \code{\linkS4class{musica}} musica contains variants, #' count tables, and sample annotations #' @param result A \code{\linkS4class{musica_result}} object generated by #' a mutational discovery or prediction tool. @@ -222,13 +222,13 @@ setReplaceMethod( ) #' @title Retrieve variants from a musica or musica_result object -#' @description The \code{variants} \code{data.table} contains the variants +#' @description The \code{variants} \code{data.table} contains the variants #' and variant-level annotations #' @param object A \code{\linkS4class{musica}} object generated by -#' the \link{create_musica_from_variants} or \link {create_musica_from_counts} function, +#' the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, #' or a \code{\linkS4class{musica_result}} #' object generated by a mutational discovery or prediction tool. -#' @rdname variants +#' @rdname variants #' @return A data.table of variants #' @export #' @examples @@ -263,7 +263,7 @@ setMethod( #' @rdname variants #' @param musica A \code{\linkS4class{musica}} object generated by #' the \link{create_musica_from_variants} or \link{create_musica_from_counts} function -#' @param value A \code{\linkS4class{data.table}} of mutational variants and +#' @param value A \code{\linkS4class{data.table}} of mutational variants and #' variant-level annotations #' @export #' @examples @@ -288,15 +288,15 @@ setReplaceMethod( } ) -#' @title Retrieve the list of count_tables from a musica or musica_result +#' @title Retrieve the list of count_tables from a musica or musica_result #' object -#' @description The \code{count_tables} contains standard and/or custom +#' @description The \code{count_tables} contains standard and/or custom #' count tables created from variants #' @param object A \code{\linkS4class{musica}} object generated by #' the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, #' or a \code{\linkS4class{musica_result}} #' object generated by a mutational discovery or prediction tool. -#' @rdname tables +#' @rdname tables #' @return A list of count_tables #' @export #' @examples @@ -330,10 +330,10 @@ setMethod( #' @rdname tables #' @param musica A \code{\linkS4class{musica}} object generated by -#' the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, +#' the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, #' or a \code{\linkS4class{musica_result}} #' object generated by a mutational discovery or prediction tool. -#' @param value A list of \code{\linkS4class{count_table}} objects representing +#' @param value A list of \code{\linkS4class{count_table}} objects representing #' counts of motifs in samples #' @examples #' data(musica) @@ -365,20 +365,20 @@ setReplaceMethod( #' @title Get or set sample annotations from a musica or musica_result object #' @description Sample annotations can be used to store information about -#' each sample such as tumor type or treatment status. These are used in -#' downstream plotting functions such as \code{\link{plot_exposures}} or -#' \code{\link{plot_umap}} to group or color samples by a particular annotation. +#' each sample such as tumor type or treatment status. These are used in +#' downstream plotting functions such as \code{\link{plot_exposures}} or +#' \code{\link{plot_umap}} to group or color samples by a particular annotation. #' @param object A \code{\linkS4class{musica}} object generated by -#' the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, +#' the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, #' or a \code{\linkS4class{musica_result}} #' object generated by a mutational discovery or prediction tool. #' @param name The name of the new annotation to add. #' @param value A vector containing the new sample annotations. Needs to be -#' the same length as the number of samples in the object. +#' the same length as the number of samples in the object. #' @rdname samp_annot -#' @return A new object with the sample annotations added to the table in the +#' @return A new object with the sample annotations added to the table in the #' \code{sample_annotations} slot. -#' @seealso See \code{\link{sample_names}} to get a vector of sample names in +#' @seealso See \code{\link{sample_names}} to get a vector of sample names in #' the \code{\linkS4class{musica}} or \code{\linkS4class{musica_result}} object. #' @export #' @examples @@ -470,12 +470,12 @@ setReplaceMethod( #' @title Retrieve sample names from a musica or musica_result object #' @description Sample names were included in the \code{sample} column #' in the variant object passed to \code{\link{create_musica_from_variants}}, or in -#' the colnames of the count table object passed to +#' the colnames of the count table object passed to #' \code{\link{create_musica_from_counts}}. This returns -#' a unique list of samples names in the order they are inside the +#' a unique list of samples names in the order they are inside the #' \code{\linkS4class{musica}} object. #' @param object A \code{\linkS4class{musica}} object generated by -#' the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, +#' the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, #' or a \code{\linkS4class{musica_result}} #' object generated by a mutational discovery or prediction tool. #' @rdname sample_names @@ -520,15 +520,15 @@ setMethod( ################################################33 -#' @title Retrieve the names of count_tables from a musica or musica_result +#' @title Retrieve the names of count_tables from a musica or musica_result #' object -#' @description The \code{count_tables} contains standard and/or custom +#' @description The \code{count_tables} contains standard and/or custom #' count tables created from variants #' @param object A \code{\linkS4class{musica}} object generated by #' the \link{create_musica_from_variants} or \link{create_musica_from_counts} function, #' or a \code{\linkS4class{musica_result}} #' object generated by a mutational discovery or prediction tool. -#' @rdname built_tables +#' @rdname built_tables #' @return The names of created count_tables #' @export #' @examples @@ -559,3 +559,724 @@ setMethod( return(names(object@musica@count_tables)) } ) + +################################################ + +#' @title Retrieve ground_truth musica_result object from a full_benchmark object +#' @description The \code{ground_truth} musica_result object contains true +#' signatures and exposures for the benchmarking analysis. +#' @param result A \code{\linkS4class{full_benchmark}} object generated by +#' the \link{create_benchmark} function. +#' @rdname ground_truth +#' @return A \code{\linkS4class{musica_result}} object with the ground truth +#' signatures and exposures +#' @export +setGeneric( + name = "ground_truth", + def = function(result) + { + standardGeneric("ground_truth") + } +) + +#' @rdname ground_truth +setMethod( + f = "ground_truth", + signature = "full_benchmark", + definition = function(result) { + return(result@ground_truth) + } +) + +#' @rdname ground_truth +#' @param result A \code{\linkS4class{full_benchmark}} object generated by +#' the \link{create_benchmark} function. +#' @param value A \code{\linkS4class{musica_result}} object with the ground truth +#' signatures and exposures +#' @export +setGeneric( + name = "ground_truth<-", + def = function(result, value) + { + standardGeneric("ground_truth<-") + } +) + +#' @rdname ground_truth +setReplaceMethod( + f = "ground_truth", + signature = c("full_benchmark", "musica_result"), + definition = function(result, value) + { + result@ground_truth <- value + return(result) + } +) + +#' @title Retrieve method_view_summary matrix from a full_benchmark object +#' @description The \code{method_view_summary} matrix contains a summary of all +#' the individual benchmarks completed in the analysis. +#' @param result A \code{\linkS4class{full_benchmark}} object generated by +#' the \link{create_benchmark} function. +#' @rdname method_view_summary +#' @return A matrix containing the summary of all individual benchmarks +#' @export +setGeneric( + name = "method_view_summary", + def = function(result) + { + standardGeneric("method_view_summary") + } +) + +#' @rdname method_view_summary +setMethod( + f = "method_view_summary", + signature = "full_benchmark", + definition = function(result) { + return(result@method_view_summary) + } +) + +#' @rdname method_view_summary +#' @param result A \code{\linkS4class{full_benchmark}} object generated by +#' the \link{create_benchmark} function. +#' @param value A matrix containing summary information for all individual +#' benchmarks run in the analysis +#' @export +setGeneric( + name = "method_view_summary<-", + def = function(result, value) + { + standardGeneric("method_view_summary<-") + } +) + +#' @rdname method_view_summary +setReplaceMethod( + f = "method_view_summary", + signature = c("full_benchmark", "matrix"), + definition = function(result, value) + { + result@method_view_summary <- value + return(result) + } +) + +#' @title Retrieve sig_view_summary matrix from a full_benchmark object +#' @description The \code{sig_view_summary} matrix contains a summary of all +#' the individual benchmarks completed in the analysis, organized by signature +#' @param result A \code{\linkS4class{full_benchmark}} object generated by +#' the \link{create_benchmark} function. +#' @rdname sig_view_summary +#' @return A matrix containing the summary of all individual benchmarks, +#' organized by signature +#' @export +setGeneric( + name = "sig_view_summary", + def = function(result) + { + standardGeneric("sig_view_summary") + } +) + +#' @rdname sig_view_summary +setMethod( + f = "sig_view_summary", + signature = "full_benchmark", + definition = function(result) { + return(result@sig_view_summary) + } +) + +#' @rdname sig_view_summary +#' @param result A \code{\linkS4class{full_benchmark}} object generated by +#' the \link{create_benchmark} function. +#' @param value A matrix containing summary information for all individual +#' benchmarks run in the analysis, organized by signature +#' @export +setGeneric( + name = "sig_view_summary<-", + def = function(result, value) + { + standardGeneric("sig_view_summary<-") + } +) + +#' @rdname sig_view_summary +setReplaceMethod( + f = "sig_view_summary", + signature = c("full_benchmark", "matrix"), + definition = function(result, value) + { + result@sig_view_summary <- value + return(result) + } +) + +#' @title Retrieve indv_benchmarks list from a full_benchmark object +#' @description The \code{indv_benchmarks} list contains +#' \code{\linkS4class{single_benchmark}} objects that each contain the benchmarking +#' information from a single benchmark run. +#' @param result A \code{\linkS4class{full_benchmark}} object generated by +#' the \link{create_benchmark} function. +#' @rdname indv_benchmarks +#' @return A list of \code{\linkS4class{single_benchmark}} objects +#' @export +setGeneric( + name = "indv_benchmarks", + def = function(result) + { + standardGeneric("indv_benchmarks") + } +) + +#' @rdname indv_benchmarks +setMethod( + f = "indv_benchmarks", + signature = "full_benchmark", + definition = function(result) { + return(result@indv_benchmarks) + } +) + +#' @rdname indv_benchmarks +#' @param result A \code{\linkS4class{full_benchmark}} object generated by +#' the \link{create_benchmark} function. +#' @param value A list of \code{\linkS4class{single_benchmark}} objects, each +#' containing information from a single benchmark run +#' @export +setGeneric( + name = "indv_benchmarks<-", + def = function(result, value) + { + standardGeneric("indv_benchmarks<-") + } +) + +#' @rdname indv_benchmarks +setReplaceMethod( + f = "indv_benchmarks", + signature = c("full_benchmark", "list"), + definition = function(result, value) + { + result@indv_benchmarks <- value + return(result) + } +) + +###################################### + +#' @title Retrieve the initial prediction from a single_benchmark object +#' @description The \code{initial_pred} object is a +#' \code{\linkS4class{musica_result}} object containing the initial signature +#' and exposure predictions inputted for benchmarking +#' @param result A \code{\linkS4class{single_benchmark}} object extracted from +#' a \code{\linkS4class{full_benchmark}} object +#' @rdname initial_pred +#' @return A \code{\linkS4class{musica_result}} object containing the initial +#' predictions +#' @export +setGeneric( + name = "initial_pred", + def = function(result) + { + standardGeneric("initial_pred") + } +) + +#' @rdname initial_pred +setMethod( + f = "initial_pred", + signature = "single_benchmark", + definition = function(result) { + return(result@initial_pred) + } +) + +#' @rdname initial_pred +#' @param result A \code{\linkS4class{single_benchmark}} object extracted from +#' a \code{\linkS4class{full_benchmark}} object +#' @param value A \code{\linkS4class{musica_result}} object containing the +#' initial signature and exposure predictions +#' @export +setGeneric( + name = "initial_pred<-", + def = function(result, value) + { + standardGeneric("initial_pred<-") + } +) + +#' @rdname initial_pred +setReplaceMethod( + f = "initial_pred", + signature = c("single_benchmark", "musica_result"), + definition = function(result, value) + { + result@initial_pred <- value + return(result) + } +) + +#' @title Retrieve the intermediate prediction from a single_benchmark object +#' @description The \code{intermediate_pred} object is a +#' \code{\linkS4class{musica_result}} object containing the signature and +#' exposure predictions after duplicate signatures are adjusted but before +#' composite sigantures are adjusted +#' @param result A \code{\linkS4class{single_benchmark}} object extracted from +#' a \code{\linkS4class{full_benchmark}} object +#' @rdname intermediate_pred +#' @return A \code{\linkS4class{musica_result}} object containing the intermediate +#' predictions +#' @export +setGeneric( + name = "intermediate_pred", + def = function(result) + { + standardGeneric("intermediate_pred") + } +) + +#' @rdname intermediate_pred +setMethod( + f = "intermediate_pred", + signature = "single_benchmark", + definition = function(result) { + return(result@intermediate_pred) + } +) + +#' @rdname intermediate_pred +#' @param result A \code{\linkS4class{single_benchmark}} object extracted from +#' a \code{\linkS4class{full_benchmark}} object +#' @param value A \code{\linkS4class{musica_result}} object containing the +#' intermediate signature and exposure predictions +#' @export +setGeneric( + name = "intermediate_pred<-", + def = function(result, value) + { + standardGeneric("intermediate_pred<-") + } +) + +#' @rdname intermediate_pred +setReplaceMethod( + f = "intermediate_pred", + signature = c("single_benchmark", "musica_result"), + definition = function(result, value) + { + result@intermediate_pred <- value + return(result) + } +) + +#' @title Retrieve the final prediction from a single_benchmark object +#' @description The \code{final_pred} object is a +#' \code{\linkS4class{musica_result}} object containing the signature and +#' exposure predictions after benchmarking is complete +#' @param result A \code{\linkS4class{single_benchmark}} object extracted from +#' a \code{\linkS4class{full_benchmark}} object +#' @rdname final_pred +#' @return A \code{\linkS4class{musica_result}} object containing the final +#' predictions +#' @export +setGeneric( + name = "final_pred", + def = function(result) + { + standardGeneric("final_pred") + } +) + +#' @rdname final_pred +setMethod( + f = "final_pred", + signature = "single_benchmark", + definition = function(result) { + return(result@final_pred) + } +) + +#' @rdname final_pred +#' @param result A \code{\linkS4class{single_benchmark}} object extracted from +#' a \code{\linkS4class{full_benchmark}} object +#' @param value A \code{\linkS4class{musica_result}} object containing the +#' final signature and exposure predictions +#' @export +setGeneric( + name = "final_pred<-", + def = function(result, value) + { + standardGeneric("final_pred<-") + } +) + +#' @rdname final_pred +setReplaceMethod( + f = "final_pred", + signature = c("single_benchmark", "musica_result"), + definition = function(result, value) + { + result@final_pred <- value + return(result) + } +) + +#' @title Retrieve the intial comparison data.frame from a single_benchmark object +#' @description The \code{initial_comparison} data.frame contains the comparison +#' between the initial predicted signatures and the true signatures. +#' @param result A \code{\linkS4class{single_benchmark}} object extracted from +#' a \code{\linkS4class{full_benchmark}} object +#' @rdname initial_comparison +#' @return A data.frame object containing the initial comparison between predicted +#' and true signatures +#' @export +setGeneric( + name = "initial_comparison", + def = function(result) + { + standardGeneric("initial_comparison") + } +) + +#' @rdname initial_comparison +setMethod( + f = "initial_comparison", + signature = "single_benchmark", + definition = function(result) { + return(result@initial_comparison) + } +) + +#' @rdname initial_comparison +#' @param result A \code{\linkS4class{single_benchmark}} object extracted from +#' a \code{\linkS4class{full_benchmark}} object +#' @param value A data.frame containing the initial comparison between predicted +#' and true signatures, generated by the \link{benchmark_compare_results} function +#' @export +setGeneric( + name = "initial_comparison<-", + def = function(result, value) + { + standardGeneric("initial_comparison<-") + } +) + +#' @rdname initial_comparison +setReplaceMethod( + f = "initial_comparison", + signature = c("single_benchmark", "data.frame"), + definition = function(result, value) + { + result@initial_comparison <- value + return(result) + } +) + +#' @title Retrieve the initial comparison data.frame from a single_benchmark object +#' @description The \code{intermediate_comparison} data.frame contains the comparison +#' between the predicted signatures and the true signatures after duplicate +#' signatures have been adjusted but before composite signatures have been +#' adjusted. +#' @param result A \code{\linkS4class{single_benchmark}} object extracted from +#' a \code{\linkS4class{full_benchmark}} object +#' @rdname intermediate_comparison +#' @return A data.frame object containing the intermediate comparison between +#' predicted and true signatures +#' @export +setGeneric( + name = "intermediate_comparison", + def = function(result) + { + standardGeneric("intermediate_comparison") + } +) + +#' @rdname intermediate_comparison +setMethod( + f = "intermediate_comparison", + signature = "single_benchmark", + definition = function(result) { + return(result@intermediate_comparison) + } +) + +#' @rdname intermediate_comparison +#' @param result A \code{\linkS4class{single_benchmark}} object extracted from +#' a \code{\linkS4class{full_benchmark}} object +#' @param value A data.frame containing the intermediate comparison between +#' predicted and true signatures, generated by the +#' \link{benchmark_compare_results} function +#' @export +setGeneric( + name = "intermediate_comparison<-", + def = function(result, value) + { + standardGeneric("intermediate_comparison<-") + } +) + +#' @rdname intermediate_comparison +setReplaceMethod( + f = "intermediate_comparison", + signature = c("single_benchmark", "data.frame"), + definition = function(result, value) + { + result@intermediate_comparison <- value + return(result) + } +) + +#' @title Retrieve the final comparison data.frame from a single_benchmark object +#' @description The \code{final_comparison} data.frame contains the comparison +#' between the predicted signatures and the true signatures after benchmarking +#' is complete. +#' @param result A \code{\linkS4class{single_benchmark}} object extracted from +#' a \code{\linkS4class{full_benchmark}} object +#' @rdname final_comparison +#' @return A data.frame object containing the final comparison between predicted +#' and true signatures +#' @export +setGeneric( + name = "final_comparison", + def = function(result) + { + standardGeneric("final_comparison") + } +) + +#' @rdname final_comparison +setMethod( + f = "final_comparison", + signature = "single_benchmark", + definition = function(result) { + return(result@final_comparison) + } +) + +#' @rdname final_comparison +#' @param result A \code{\linkS4class{single_benchmark}} object extracted from +#' a \code{\linkS4class{full_benchmark}} object +#' @param value A data.frame containing the final comparison between predicted +#' and true signatures, generated by the \link{benchmark_compare_results} +#' function +#' @export +setGeneric( + name = "final_comparison<-", + def = function(result, value) + { + standardGeneric("final_comparison<-") + } +) + +#' @rdname final_comparison +setReplaceMethod( + f = "final_comparison", + signature = c("single_benchmark", "data.frame"), + definition = function(result, value) + { + result@final_comparison <- value + return(result) + } +) + +#' @title Retrieve the method_id from a single_benchmark object +#' @description The \code{method_id} is the identifier for a single +#' benchmark run. +#' @param result A \code{\linkS4class{single_benchmark}} object extracted from +#' a \code{\linkS4class{full_benchmark}} object +#' @rdname method_id +#' @return The identifier for the single benchmark run +#' @export +setGeneric( + name = "method_id", + def = function(result) + { + standardGeneric("method_id") + } +) + +#' @rdname method_id +setMethod( + f = "method_id", + signature = "single_benchmark", + definition = function(result) { + return(result@method_id) + } +) + +#' @rdname method_id +#' @param result A \code{\linkS4class{single_benchmark}} object extracted from +#' a \code{\linkS4class{full_benchmark}} object +#' @param value A character containing the identifier for the single benchmark +#' run +#' @export +setGeneric( + name = "method_id<-", + def = function(result, value) + { + standardGeneric("method_id<-") + } +) + +#' @rdname method_id +setReplaceMethod( + f = "method_id", + signature = c("single_benchmark", "character"), + definition = function(result, value) + { + result@method_id <- value + return(result) + } +) + +#' @title Retrieve the threshold from a single_benchmark object +#' @description The \code{threshold} is the cosine similarity cutoff for +#' comparisons between predicted and true signatures +#' @param result A \code{\linkS4class{single_benchmark}} object extracted from +#' a \code{\linkS4class{full_benchmark}} object +#' @rdname threshold +#' @return The threshold for the single benchmark run +#' @export +setGeneric( + name = "threshold", + def = function(result) + { + standardGeneric("threshold") + } +) + +#' @rdname threshold +setMethod( + f = "threshold", + signature = "single_benchmark", + definition = function(result) { + return(result@threshold) + } +) + +#' @rdname threshold +#' @param result A \code{\linkS4class{single_benchmark}} object extracted from +#' a \code{\linkS4class{full_benchmark}} object +#' @param value The threshold for the single benchmark run +#' @export +setGeneric( + name = "threshold<-", + def = function(result, value) + { + standardGeneric("threshold<-") + } +) + +#' @rdname threshold +setReplaceMethod( + f = "threshold", + signature = c("single_benchmark", "numeric"), + definition = function(result, value) + { + result@threshold <- value + return(result) + } +) + +#' @title Retrieve the adjustment_threshold from a single_benchmark object +#' @description The \code{adjustment_threshold} is the cosine similarity value +#' of high confidence. Comparisons that meet this cutoff are assumed to be likely, +#' while those that fall below the cutoff will be disregarded if the predicted +#' signature is already captured above the threshold. +#' @param result A \code{\linkS4class{single_benchmark}} object extracted from +#' a \code{\linkS4class{full_benchmark}} object +#' @rdname adjustment_threshold +#' @return The adjustment_threshold for the single benchmark run +#' @export +setGeneric( + name = "adjustment_threshold", + def = function(result) + { + standardGeneric("adjustment_threshold") + } +) + +#' @rdname adjustment_threshold +setMethod( + f = "adjustment_threshold", + signature = "single_benchmark", + definition = function(result) { + return(result@adjustment_threshold) + } +) + +#' @rdname adjustment_threshold +#' @param result A \code{\linkS4class{single_benchmark}} object extracted from +#' a \code{\linkS4class{full_benchmark}} object +#' @param value The adjustment_threshold for the single benchmark run +#' @export +setGeneric( + name = "adjustment_threshold<-", + def = function(result, value) + { + standardGeneric("adjustment_threshold<-") + } +) + +#' @rdname adjustment_threshold +setReplaceMethod( + f = "adjustment_threshold", + signature = c("single_benchmark", "numeric"), + definition = function(result, value) + { + result@adjustment_threshold <- value + return(result) + } +) + +#' @title Retrieve the description from a single_benchmark object +#' @description The \code{description} contains details about the prediction +#' that is benchmarked. +#' @param result A \code{\linkS4class{single_benchmark}} object extracted from +#' a \code{\linkS4class{full_benchmark}} object +#' @rdname description +#' @return The description for the single benchmark run +#' @export +setGeneric( + name = "description", + def = function(result) + { + standardGeneric("description") + } +) + +#' @rdname description +setMethod( + f = "description", + signature = "single_benchmark", + definition = function(result) { + return(result@description) + } +) + +#' @rdname description +#' @param result A \code{\linkS4class{single_benchmark}} object extracted from +#' a \code{\linkS4class{full_benchmark}} object +#' @param value The description for the single benchmark run +#' @export +setGeneric( + name = "description<-", + def = function(result, value) + { + standardGeneric("description<-") + } +) + +#' @rdname description +setReplaceMethod( + f = "description", + signature = c("single_benchmark", "character"), + definition = function(result, value) + { + result@description <- value + return(result) + } +) diff --git a/R/test_data.R b/R/test_data.R index 23289c64..6f59e683 100644 --- a/R/test_data.R +++ b/R/test_data.R @@ -152,3 +152,33 @@ #' @keywords datasets #' "synthetic_breast_true_exposures" + +#' example_predicted_sigs +#' +#' A matrix containing the predicted signatures for a synthetic breast cancer +#' dataset with 214 samples, generated using NMF and k=8 signatures. +#' +#' @docType data +#' +#' @usage data(example_predicted_sigs) +#' +#' @format An object of class \code{matrix} +#' +#' @keywords datasets +#' +"example_predicted_sigs" + +#' example_predicted_exp +#' +#' A matrix containing the predicted signature exposure levels for a synthetic +#' breast cancer dataset with 214 samples, generated using NMF and k=8 signatures. +#' +#' @docType data +#' +#' @usage data(example_predicted_exp) +#' +#' @format An object of class \code{matrix} +#' +#' @keywords datasets +#' +"example_predicted_exp" diff --git a/data/example_predicted_exp.rda b/data/example_predicted_exp.rda new file mode 100644 index 0000000000000000000000000000000000000000..3d7a129557c842051e2e972ca41b9f1d4b51972e GIT binary patch literal 13798 zcma*tLwF_N5+>m8*tTuk>Daby+c`-(wr$(i>7YAC$4*Y{q@xpO?%cbb#nhte`QKe_ z>;FjO;J*F0Agwm7G}FyCSxnWT+36r5p*1Dd@btx$?KFF#&#wP(p{1iiF-an6s{Wvd zmP15iqZ|5WHrE9P&(?SBl8zX9%(Wf+uGII;z4Zncvy?IZXi`uS6BAPslaP2O{I^GEyhZRY27%aq$+xV7NOxNRURN<$FVl9(FYjCIkcr8oLY;@ym9%wxBq zIJRj=7RuzkmjR~4z3EHhO;~9$>t)=`phnSqOfmo5O8*ZUvQw5ey)%b$yiS=c6^)VQ z02z`XXF4SO>@dKD&(NVOkB{UagR`X_&}85f8>R4FL+)p*tV^27+o8%2OVKy$u1K12 zAB4+lt76F+YVS;*GkM8*J4+J`hX^vGH?V@&0vqf8IKAK?Oxo$d)m?5wGjDc)&s`aj zI2dUwYn?yrnclCVH>&L9-M#t_@v?#Jig@aAA`tJDVyKH{1!p8z#f+yZ4|!)}G9|sM z5}oQsteN)%NkTv60C`a)`UT^8uq+-G1uUvCHmeD0U7NO)+7qS<09t)c973^9S?(Ox zNx^Y>W;U#K$A24`ShIxn*3F>l6;8C4*`oVNX)y=YZ^AFW@|fMV%enL&Eb~CLWkND6#$>$>TKgWZQohtm)CZqtvO-)z%!+ejwJ6iRzAH=kvf$vEhbY8{!=n{bKrzEaOMrR+s`$RX&Bx5f9!4ei zCQT)BRe^Wk)a?B**z($n=dyBM10Lf@S2q@9Q^g6>l_y$kgfTvej|_gsw+S}?2WmaRpn^WEk8hQ@C}P+GNJiB;0Su>P?uT1diACvb1jk#bsvh?dkrEQqp)yyt^4eZ9+R$6P9{U zVL-HqTa36-zeA-9{%E*JLGnD$gN+E^#zNs`J2|(Tw@t7MQLo7%$6|0$kEGd2L96FL z9f7gMlyHZ`CTN+>V9m7_F45eW%ewI=T<1FrBOX4?6x&>o8zhrzKp^9D`vR};ODs@} zcQ&-Pa-(rc14*`-ZajBN80zPc`)}seO>!VhiB&~uLMMN18-RJyS|Am%^fSmoPTdlZct=Q;t?yv7$hFDmQk;7os|2)g0Gjb2V#5@ z)~7RZPL#5ae`mor(P^C2FJ!#%A&N$=w@7n>Fcnp$QDUW$3?~IqpXa#m{Mox?T{tT{g?WMl&koewQAAP&A0L)vvU zsp>O*WGHxGlrm}xH3j6E@du8YW+)NsyI~w9ZzJCDN@V#d!j^o7ryfJZ!ETke>V3h; z{~)($A0lf-72rPsEHdka^@;OO1}NY-!Vor);$B741CdY8BER{nZTSiZb->d!I(dQK zQ{he~_pN=&AeJi^)=xj^fzxeD4|c zXK>ls(Sh0WNa${hEa@uMump{W0};3%Zk@I&RTG^kG3)%Hx$N3f`@tz|I=&ldxgc=_ zOp}Z~!}dZ_Nqks~zIaX>@##ospRfsoB`gAP1vv#^hX+=DwE8OeZORMTX5@I`!7 z{GNcY<(a(4c96vF`&sWoV{wSD*WBKe`hzUTORD&GGZJnlA!Y;ztLqWQTgB^``H6ig z2V@kkICZ66mT~iMjS)wF*4Gu5pfzY?g9SF~>yh7pS(^O2=zeNl;TB)w%szOOHh%l$ zJHxHN?y=~1CjjGwuGDbEs%(meX$N0I|FM`W~MUT*!6;His6px(g_L>pr% zxDpX{SRpmyEKG~39^=JFe()tU-Ovk{T*;`KafkV%a&zN5U7k};G2k7E{ai<-!V!?a%c$;rgafUu z1Fw=B{h>;jH1nSEjG$li;d?Y-zqf#OBRV7l`}Lrtuf7|Lmil3StgFI`m58DtRO}5o zS0G4_f&5QZuwXCmrXT-B2tdUpz-Y?+9KF2qj>3+Mjf+HiwfFBjicIE(?w^oeitr{! zjHgfS$BRUUD>K(R&iw~rI&1Dlr?4ml_>n51Jyo*1k?k7aik8gza+@T^#o_iHXLaF2 z2D8O8PKXB31j0DrReS4NbWK0`2kfAS&?a93A$KLH<&!eW$)1JjSYLv3(->rnS$4XE z*`aIB{~U99?Ct@E6>gXcND$IJfncI?ndF?;0$CbrqVB^j_t$B^?&sM|UQwUO4e+iY z_9<}qWZHHm$%$gGY{N-wa*+6Me1I2_ka<|3=y+V)1l}+(N6rW&mlUPn)F*R_69e9E zpg%DULulIqGOTaG?cbvoaNmUX0a9-oOBUfe@q)%PIgnRvGzP}(l*L5djVe=`uD5<{ zHi|ci^Y9fpu0OI~VH=_X;@HnIL!&0&?7a-E=}h#j4m~Es-%$tJih`PkMlyX~WyE0= z$DEO*G1AEj!PZn%CIs37>|0P(63w!fFQ1N6Fk3PsZ^0_jzy7n3e5?U;wD~`Ux@h~b zli@<`p#)cpqV_q*P_(wS()x{V2vGs+w@^VBvjLmZ zC!QjhVcW(FBHvROm4Byoo70XdO#So!)IA^?L%A!-X{6@0-4=^P=0cWktLy^;*HWlI z1#~xpVOo}y(lzf2;P3%8PfA8epK4YI^6he?a~8y}3IU|_+Nd?+1XX*Lk29Of2UNZa zE>)yFsSzt~+k7#^nDf^uE8RXN(6gfSnPDV{3e$yE=Yp16Kk15MQ0B_5e1(_f@@?3{ zkeNg3}P5piXF!3*lt z#)cN=I#LFaL<6u5YS}TuZ}kah&&MUxi`H7Aq@+nhcg8$Ej|Ipp{+@e&Zqi6%lM*pm zbVvNag+J&V)W!mm^jnE`AcI1)bg~t_OYRnLD0y0TO?3ttj2Na9FNgv}%0;kc_C8}A zvW%(GfnWEE5_O~ZnnIL(5_R7)d$;aChwtX!i`o0q8DMTmv1Et|28S%P-T0pCINzss zfmpq4$R=L*Q$au$%CPmMuGk?Avq$@^J}u8Z%u9-rkqfxE#Os03YerQ#O+CB)tZ>6i|ZSqT)G9H1$2`>5QZC^b{;Vu zXu>YKPfN)j#(|SRsAXdbk7*EOMJHjz-71C1$d`5;_ z9ZI)#{xtT>7V)+t9DmVP7!VcD!{rP~ z)f8bC-14E$5+huhj47x0MlYDPxEEI%lgg-QI9nua%oRW`tzIik=efQIv>X z13ovSki%V$)%!Kvrl3xSw28O1(z=R1cOGCls#Wcu&8|FcqAnH z&@Rq9b%1Gro%gTm5k!0rleIYW4mYngbEn6XZ}0X82AKN`DReCceqmAgC{$>b8TVcrIB{Ii8bz+c`+5kvuaFE*}w^QLm^ZDRr(b7LF~d z0x^d%124hO5^!^Z<)><|@G%m@Ju?I1(R)*>2MPLNet8aam6xQqLZA{e@@EKptD>B# zBjaCQDs_XvQ{+(??W4u6-5;mkP6-6O3{(I=!_T#D1fDt+^WjV+w%w6|YCj5sGuXH@ zO3x%p=?&#px(85C18X~T62^YYv)bD6TMb(`Ue7u4TR}p#N}nzILK5RnZq2e8Rx^(I z`-=`YYGZ;RFMY(+FwM}xgxnx(^X4$V@zBE79|6S>VU_`r~n;Z1HE8d4yhTx zV8`j-DN1_q;Ae>j7#0^(6MHQ>sD3jJb1u%a!|aboqH>!Y*2KGDr3A$6u#AX3p-l)+ z>7V_=@EfCKLpUU+R`S`mKZr2ELKZ=YiyQy@YFbe35WXJyCQ1Y*1VUnO6Z+)Kd&=6g zRW_9L#jQxKR5qkc`eyVup=DBSpZJ}S4lRtaf=uAvO~O23EPKdEZNTFbsyJLhWAZ$ zPb>J)cCGlA!bVvyPdn9{M9J6EyKkgri1~@ASKT+?(_gBH2eSktlY798K5@n)lAWdp z9?STLQFo)q{aNI*LEt%7m$wk}21HF#d83p$Pm0T%P%PiT<7XL_53Il9zf%RfV91>) zz@b)q2Hn9VWbwcD8JS@{to z8=%s^k)%$!+BH0Bgh-nse?S#h z>WtF#^-;nIbC}GvNi!DHv}V4I->k%Dzom}z4hbKX<8`fXVUlG5)c4urpKvRia{}(6 zM(*1K0|Ec!gvC)Vgz`F0Q=at$LCN#rJT?)uA&OG57?rN0EG<8cSv9&ZrVRJx5!HjC zG@4n3z1$*!>dqGmad$xxc1yJ%ls8I}Lpxg@KrCOxZaiHo54)`^f94$tXl85WnuF?p z(MA4hG?A8-g0#KL1Y|iQk(Dx(NcdYp>MWU{+vDlI?b4-a42`}r%$_9lO2@OaK@~zX z7|}0wYDx04yW6yqP-xpXB*IFB1Tsqj_voqvoPHZFjrGSmP|z7DuIyZlu6JLKu3ee( z8ccj=@vysx*La!TCz~L|@*8zvof{5pUI=WdLm-)8HKM+vMx%){8LMj=8Mq0m>05(7 z40qZVqCJFOXkdF@m4E>IU+t7X#M^jP$cmuSOp@v;iP#gj4w*yeo9ms8(vC$ z^vU&Y!Qd<(NJ;J9o1D6Df9)yxjRtkYgu(eWQan_Mc)b@^cGpu~m{|*p+?%0F z^mh*hg-eB{jSf+N=iwKL6G_w*=}YN*2I9D>s72>dDhYd4sX91UM+}6rIqNn-4Rv46 z%-5Ga98snDW-UiKd_7UKD?)^jrCU@CK*dS!7X(bbeZzSsQp6*bdkkk^w#AGxMozD@ zGA@!&iufaBrXJi+c&smozq1AZv$wGlt7)GxoO$e6)yJLevPYA$tP56%s7u~?(+KHI zB<4tL|82;|sEfyl{K??bK@64ii+sN;NKU_tlgQGfk_)R)dgEohRge>ZoWa=UJ+qsmtK_3h)oqrxXn{#}wT zDz>?4*}1qS?18PU=(4RJ{_>*=r99x;s^-~jtKuqsrDRbIGK&20kVp3#q%S{0asIm8 ziS2&NYDb+I5Pw#~?LapPPS1b#rTN42?M#Qs+xT)dq;FAi;8+$(9Z4|9a=$D^_3nKh z_*;6v|5feXci?-zojbS0i3<7K$xy>!8bLFQJC02oT7-eO$#UKAZ8$Um5>cOfOz|)3 z^>x3#;kFT7b#pJ=h4%iJicbBZfjIVbF;~2yBO7r4ah=ru=q1UBuRdQPX6r_mFbp;@ zpGk{@z^uk2up`g65XU6#3uhi30@%_((3igsI@=+j6|snqc8Qg*S%C!+Y*#HC_#vWf z_ptq?c&C7tp2MNz>4X4tVVpKVkpahnm&(4Y1DnT+(A3adj{a`yDR<#TmXEkJWpk{F zUY*=y=f~JmWLLq|SeqB}`==@O*N)Py2?MJvkPF5}29>%U#H2&`|r5REG{~C-vsDvJsb*{yFzo81Z}ex#pPP$2D91($kZT zu%-;o>Qgn<3#RO63N@d{0bP51{gcSwl`$5HMs|jw0h{0Hao4GpS-9Md2VS9CGahab0}&9h8D{xK4#uE{gF4`L8VI)Y) z*wbsTiHfPwGr4wQKEzPy|^Bnyya?7<0!|2qL34qp2^Y+Pp9$ajY!F7BYzS?BeO zIXkj${SaE+ZYTXkq%9;MX=Hsp!(Nwjp#0_i2xmBIe~?s}JyJ(sdf_Qranv2<<8wzW z|1AxRa$5vU)^<4`G=oQs)m?D_FU7~8 z+mF*5Fe=jAn?Wu;?d7Nv#2pq#ALlnqEQuD1Hr!3kaCNWA`Lx#f;=OapheyugDPq{q*iT2ZWXBI!22!S|y;|qT(_iA959-_AIdWS90 zvNWs}T`OFVp!g`>4pw-ei&3zz?5ByMIx?XD{;KW@;2{|v?}8dSf|RCgFd7$FEHJwd zAs`BiELG@|L~lA0>KegI8M;fC8nMO_#LPfW)n4ti)peTVtoIU zW<*}Dh3T~Dwr!n`(!JqK0;q@|eRO9y>JWk5Ho`j;4_YC5EV!uhHbElGtZo!xx%xfw z53IASZW?pTiE{7x#JP$36QQ{Ytt3(-9+CnDL7JFd@$<9bs#<(fN-H}n+RZNiX8Va? zWTvU8L=E?w6Ed-JhBt0hEkDeAxr7V(HBmmN7h8J27zno~X@_ATPjdf}1Xf%u&4SSz zg8ur4+S9WCBf}#Li!&Q6GCY;3oxR8eR_n zI;18Sw3DpuBm5gKhP|dN=E@@6&_2pIB zSK?ni4;ARM8aev=q65+QK>vT;lkAtp+5JI=R_NYe3)%90NP7b;tt~};uu_J>%03ZH zrlWrMf2C&d@@)m>E00uU)?{b%-n-J&7m#Jv2^vpB*U{a2+%8fAr z5}6tdsecQtc=^<2tt`Pf(kl~c{F^#HLC*~0<-ZdbpzA0W6&k*dUcI_M;aRtC?Bk(8@vwPDi6yce1Lbii-&#}C zoD6D`iudjk#iaWLy4l-Bc6`n#$b1gwQyj4+FY8&^NK8Yi4?djU3NAoqZ;|#x5*U6X z^2+Itqe8%|;nUkUyf7)KxnO^16tBnCou3c2wy*yLC|OS$M|JFtI^c#3BU&s0lSDPif6da!kS4?38z)T|zzoZpikT5n# z?iL%Jx4)@3ao#5kjH*&wd<&v+dmR*`GFYi`H#wKgaQvMwwM$i7d=nMH8kuRG`u)aj!h>?8$t<`##Bvja)3+Fx#U(dWn6S|WH+uVO6IKDrsM z^-X;{iH8b@t%Y@V#5YWTemQaB=Ff2G~ARh<}kCAR4>Au96o+I<3CZ527 zb&D9&(ZgF)wlD=&c&hb$oMSdDA^r+RP6Y5e3?DE4Rap$U8G2r5`iFOxcZ(uQ)y^4! zqOQEMTK+{O1|_jnGv5=}?3T$R=Uo-P!A)KSv<*4Fb|cR*2)jLJtzpJ}L*&&s58>&F zijMpI(_IjR$FOvt!>NQUt5(_5X846MF;;CfnHv+7HOe~{W8oZqZYyPxi_GB_8*fe3 zLq_^Yas1awU()8%(0Zc=lC-7;H6S-`j|QCmb3c-hBWH5h+39dZ>V4LHIm4LBg_G3@ z{S(fwrMn%q3mRlB5C%Vpi`E!z|8?*QU4k68Do*+lk`$h?bR`pXyVXMt2oPq4G4y|L zlC-6$Np-3^5f0GYDWW^sQx=oeU69!xx}Z}+lvPhQkeWW;Lcg~6m*jOux#;i>Wl$h3 z+#naGY9B}UpQ8~2G=7B?|K_VLytd}pDuw*1v`ecvt3i3xWWMX#E4h|~bms}T{{5MuLYdP_uMYNda6_2eEIDH(i2r zBUe|)pNPAOzc}M*m6C$Lo#;(7ejav_msqI4-s3BEucDRSQ+A12-BeIJr)fS5%O;m? zamT1GV~44Ei{=MAJj#>gMV`4I1m0Q`SoW`HS)T33hx25#QwmVrVoa2EXyQd(7r)3g zbZ@e)f0mg3`{(rx_o6y;=soh<-t=ef&T15P>?7g1BYX*lIW}cvDf%^rwQBJa4Jsxr zar;lR>!5o#;hPybJMn@3BVtKOf6=6f@~~6ZVUvu7hU`Dz6gHlF0ySrAYU}HS@nFo< zfimBu!qfbP(q^G)4_<4zB;mwyyEt}K8 zSMN*m876Z-q=-Qje({+rRi}m=)(3akjvUWx^S2$n8WRI7GX^q`5C2E(@wG$-s=3{$ z-@6A~Ph2^!1T~Jz=n7KvIVt=_LI!jv$9AXiSK*#wN^;}v0)g@@jLn~)v9Bwi2xm6P zt{N6|=^I~ICHs9`-TA30!R)Q(UO#d5+;iOuf&@~dCR0%w4vl3@-Ger9V|UrU*dFH7 z2jFWaT4Y=R_o1*L&E@%b=zYJ+x^5Ej_kNR%-9{3S6rBrB$B#M$1UdvLErO@vJDG1z zk26tl#^($kP0nIN8?oa&AGFOq6Mb?w$fyYB120d(>eNp!h3H9*v1D{}>)Kw$5GEx5N zhdvN!WZ%r4kz@$=*`?g=l~TUDaIx(t=|mfTa6r^2xsA!gHF8;zEZ)v7XBT1AdIGDt zT0%b?`-U|M2M*+c9ijf;X-Pw4{$U@tbHUQA%gkFD=tKd|DiNTpSj;E+8OWRc`FdUh zZh05VrmH=_>d(*#O_Q#%-xoFR@xvTxPW#{C`Z$WbyEe$M%?v`GD%c@aLqC`LJ0bHX z+^I%;8O*mgZ|t*~ShjSPoxg2GkX0kQtu|y}*_H<;4kN!;bBMc>3yV5+nYu8vSkGoM$zfn;Yq+t1p7i2OlXB z>!J7&qS*DdcZP>eqUd&jMH%r~s>z*sdlMO*gx2X_K16Y3hzb-Vzp&a?p#LzfI@-%i z4B^c%dbM+Jt%yqntSD@4^0+>+FHpyde`qohPJ5)F(?C2qH^{{(%2`xhysl@5gXI@U zQ`Bl>#vE_)kWuC+mi$}>!o!*zQ(n_`?L7`=AsC5)gGPy*3naPv8$w5tEkKcH)mUO} zX^}AU@%y+Ckr;8sPUJj`(wIm3@=3mxjV|J7t7)t0l*d#xio4rj0C)H|9jlAPBv{tr z?T@72M#Autto4og*6JZqqlYZO4cAkR9!~&IUT96muj4H7+HTO*+X%HDOp5FYF`iId zx)Hk+i_KB;>#U0Xbsjp#oS_-ko8smD*09Ix%w)2vtM$!aa!Ne*UKi)80!ia{`wF`` zw>2wiVI$2B3G#w_!3>qB+r)oGpmP9x9vpyvTM4`ReP6|tu|INc5dlLn_2;~=+@(LE z_psH{_QcO_QN;%tV^`7>$uPg29n&6`XKWJYf%`6mi7s^o0q7-=kKfleq{xUX-j^}9 zYp|drw$7PeR>Gd+=lvDIZ|>|hVuzL!OUhX%g(nuzs2Gog=BFxbc&f#SPWfCwI8aD% z?wWCK@6|kkJk_69Xc}cffYekh`Ag`dCJDEA9jE#JZioHZDeg?rLj#69^mmxfyepP9ym*xPMcS-lb|RH75 zk0!y#c^fIL-`f`qf(Izz9|s~H*Ql;QaOl0fi%{Uk;3o$fSh9t78KJHfk*QmRrT3*! zvUeIa<)mmVx%Vs^9QPt|D^Jw7ve^%*BVp+0-HU$je5F_5?gzfXy4&*F{>5k(vZ#gKK=&)(-!k2I@0XLMxTAYa8kPwS& zE9f&cmFF@pIY7)GObear88oFD`I>z%+tw)= zVK~?Dg!hij;dXpY67EL$`e%4lm7Y@n9)N5x2#c{t1%}^|b^Cl$FxfX>?`!-`z;cnG zQ)NJ3>X~xKLKJm+~<2h!M~;<#Q>Usc3at86f8&4nIoim~UUY}7yQPpGpW-=h@kN55{2>s)otH?Z=v z^J90r(JtXrD=2 zoGIx;?9?xFXuadq$O$VQ)zqLcvs5ree>an{GrqbEv(_;LmT*-KR@zHFOEA;PJg85KT!Am;Iek^uj6zvir|UqD70a)`pH7;5ql_s{>459vml zbFlZ!ddFbL!TyJda1uAbQZdd4%7ic{y`nSwgbA# zoae-xUD0qx$IJN|$ODOY{9(6RnUz$prU4QYR624$1@?Hqj-KQ$QS9x)usviR zK}V|;;|DTZxO4gd_nv36n&jO}t$E(jc7DYh-8evUAd?&{f9Q!I zAm1pI#7EZW{*|pCJ5RVXmUrZ}{7J{D=G(LSYYqDyJcTRRK@IpJKl(z&HJcrSOWwrH zLr8Zr(tY`4R0y6K$9APl%9Z_*a>-xE76jS-M27&_cK^06a}9h8azzT=Ru**PH`v42 zVd_{7LRyW|bP!dHqhPx7K9J!4{!EH%f{Mm2!SA7C)LDd`KeDXCYU4$AS_!#(_u(y> z|sQhxwMG}FU6Klz|XYs7SWaJLXh^&!aneA<=1WMzDN?RzZ#TVr?ne$E2OUm`ZKA+^UVV}rSr+t`RS5IHwOu3fXF!FC9?4* zAS=l=%_;sook`|A#P4#em3)g}CrEEEI2P~X3WipbAVnMOLD+~haxwsOA{Glv2RaN+ zsp)`h#Ca8b7;G-CwI($>{N*MoJ+<~BQW_%gh2?)0ZEM(CC@!FtVz$I49!@w0Yy^z2UrZGu=h?;=f)`jKxf7f z?V#GEr-mp6SdpXmc7}ZAb#cf?*wd9szLVB5BgAjBxiUb7zHrM;E)H${$NJvO)GoA3 z=_Tjlcm#&g(yQ{Rmcb0A82dw^Vm?azz4O=ez$cQKg@&=_>|AnR(4I_KspyV0+NCwz zarFLM(8~ERCRfGXg+ifF;_2=_I@4btv$8jtrV(W#@=_z}r7{Y*GQ!K}JZIQG;c@@R z*zchtISAeWQA*p>UEZ43s%Bi(x=c2@ADMH&4<9i7FV6c0pmi_?hT&;L9aK!vvUTu^ zWvtvNX6msJBI>W--4&64fuyep70fo>mgubByJD5x1a7O>pCyL z+vFS-R?20*b>V#r-AGm^KD^|fljrpO>9YaGtZZ$^)V#t%rhnYqC zm#MXA+zCzaLho6^=9_cSCHZla8}Or0Bl_CV4lwnJk<)c%7%XHeEswegTF**KT4<~Mgi&0hm@6M=Gl5CQTJoKy3pS;DLzns+ieJPDfpP9`FNSaJ@;=*f;;S8 zjdGA8GILm$#?F1gedfHSEW(A{oY-mU)funS1KivO&)k$_k}Iw<0_uPaT;-9_W2_x2 zg+-7hfO9LXZ#ykro>wIM*jDt$r8C@y?c*m5$v%;n=A}`{+@D>rAM6Y9y%50`0UZzu=ZFF&159IxTEYJ=uX-^4z^rnI01IyuFl7Jmmh4ChB zCzR{a^*^bCCbYg!DP}$}Rg1~OhvRl}*aa%R1WOY$*~Wb{Fd&q-ly~H4q?DZ@Vt1`$ za6J5vL+Ig*BeyKTXELE@9twVLZKsl7hwQm)O|lV!b26)w)EN0x%GEheac+>a+fB5U z)xHJOm`ix9zY3-7*V<6C^%RUauJhpEV8*EUe$YOvRpaPPsY#IjEJ+ro1s`CL>QkB4 z1F_-?SVj>;F?`_3)H+hJd%tD*>ye~Lr!HK?QrD;(3>u{mhatJn{BVp@_c~s#|3UVl z8#Oh}fE$QKTa+Dt8CIutF0tG|8yy5HFZ-hqbb56nkh_l+7hmb0t8e5+)fZ9O2rnEp z$NcuxXohmAS>AXwvGem}=ruQ+5~90f1`sQADMXU#Tt!@ZD$!Hw25Du5Ly^M08|9~LBaQ@}s0tewi@Cy4a9#_wAACh_79LKH&;67u!0L5j1P`q zFbYvi9=!sQm_RuNum9)(c{RA8o{Vd4ZGGo81JLRt6_6)mQMM3qG=s(8H zI$X4ZYASj*ZC(0~d7j+_E21?_o}tPYRQ`=5QZAtXSF|Z=lh{9l3N{Ka8s%3UaQY6h5O!TuE-C{$0PFb3}cUvpY5Aew{oPluP77gU1eQKVXKb{d zy1F+Y7|r@+n|fG$%xvZlMj%Vws!`3)e@f3L?0K* zrIU_vLMRlkqT+RS8vqic9hSjAi);A)slJP^xtB9Ae1TCEe%|(XW(7eJj;pMkm9^RPBsaQ_n39}PNmMv|j@t<>6wojox zUOs-f{+}`q0V{M>q}U{wzC((o#q0IV;*)tWIM!6y1q!!s8c-y(6vOfpzQL1b-^G!s z6MJ*m#c@JGFB*R(qov^$>n0hT%W`cb^({Y+wEqDk0LA1WN^LN&58XJpO)DmoT3bB2oWW=3;sp|f z^h33y2~h|T3vlg%Scqk6`PC|YGRI_XPqM?4Ve9gyWnZoF^1ZwivG&-nuP7JPv>zQ7^_ zkqH@PF?~^W&|iQCv`+`AJ)kz!mxI(PP&@oT;}EDV`2`+?`2-VBer=%~0j|UDMEQ;Y zij!Ze>0jt{U-v`lrC;=;pIycKCzM+6<*`3%p=2}2YA`6YhRfx{HDgj4QpL#dq5hwZ z{09e=Y~;5IG%|b$Dv1zFxMZa7ssBT=2{bDFFe-^;OSn`du~c$d4k)?E|DrgRM6@Mb z3KDrLxgSlrIJXuj7~*TOC^`*kd%x3FPc+H#9KzEAhD;COLL6MM&eDN zQR4pxFGXRvMM+6}4r#NX-06R?Ku=Vl^qL8>VsxxBgLCd7dB;)jObdc(+U_ zs{fbvKZLVP__6Z;YEa|3-}Era93U)}Lh9j@xxcs4{8;%PQaOZKDkay$r*ez6(#Wnr zP2`g4Vg47R>)}(l$y;e8SHj10(eyAW9Ku7pqIvgu*cI1F1VW!J-} z{SPI_bK&$b$sH&xmBQ;|lDW}aX~b9lhtv+GmP+aMF{#{Ytu*p0k`uXfdYJ#k`1+U> zZuV9h>6Ov(Ts%EYN(XaGrTF@kw*UM)V?gjLz#TG?C;xvuMwv>Ab-#+nzP=ZO^cnp{ HzkT~ZGH^>f literal 0 HcmV?d00001 diff --git a/data/example_predicted_sigs.rda b/data/example_predicted_sigs.rda new file mode 100644 index 0000000000000000000000000000000000000000..c5be776ac70d69ded38aa6f8cd70e8d1722e3bb8 GIT binary patch literal 6320 zcmaKnQ&=4Upn$i9Wnzh$e9Rql=Zi<(DLSD;@O|a5gsb_tr7#MDX^5=;T-$ST#kiRfAex zT^;R@;Q(C{d0nS4%=ZqzLzG%hYKFYc$= zL89wHLfkc#7O;#902n+;H75ihY_6q>#=B=g+VazzYD|7KC3jgQG$lI~bU9D~Htp{N zOZ&ZNW0zQ(5rczMH^ZX3es}j~PfKNC)GYY=@O|Xh-2!WN1&<)(zd6x`{l3Ei@;6}r zmFwRs0cnJ}PpOHQU9V5RGV$a?L=LC)BkvkEmQdFi`@GDI95(&YCgRbYKX-PKkhE5? zr5IG#V-r`d@^VL>VG=`u+#0(cYPN=TQ=f!p2kF={uA~<7Tj=%w~jb+nkW`$0A{ zH9e2^whVL^<@|BMx4eJJxGjvYKyGBM>WM$UBDX0wFW4t2Kzoh-1`vptQd8*2(7Q(3 z7st6(jw@iLMYUI-w1R-62TZ{@-|lv=zNp9q-gM1OjzV_s=lDFSAn3l-VWJJW$}6-+ ze;ja%-dV`6lf;3IFQALI zH>`8q8pDfFoE!h}(X?#8m8Z&f^4?CRSp&L)9skpS1VgaASYRB%atSFRe#^NJ-eByLd)1WmizRs*tnZ+pUWype;YNVh5HfulWc%4eIa zp@38acHxTFia>wdvuv^9vcTFsxJrA$~L}eh8FB#y%6L-!#_PR6_ZvQL0NQ{UE2j;ACA}q## z#1;*%{`N9P%J1n2by%g8Rrz2-EIW%Fm#@|esZ?ij4q8~Y_}FC*D!P?v->S*+5g1ug z@zMxVXBwN>|9l>t%t+?5Oz!;6th;ZaP+#i~^NtA8OSW7nr}7W_x$ZsQvVU*imJ6C5 zB(JN^4pY*p!2YjC6&F`H`;eHJbx@8`*{K#DX@cj^rb_yf1cL5U!})m)WrBGzN9d=A zZ>|4mi;WJtaGoOr(!dPlGna_SM`f2((Z$f$Pv;DMhN48l^rLkR0KOqS zY6#FEw#DFunCPH>J8FXvL=elQ>2rg-G3@Ujp^D&I6A(HpYfN=xr|!8P>_%=tjQdz$ zjy!{!xh?c`o_M7}Hr1Y2OMk*;BOW;&t_E`O^13E%?JglA~gT$)0O9u4o7c$#gHj zr{IoVdDa)bJ%qqw)s4F5pN~HmvTNeqa7SC57Zcy~bo0)r^RIsgr@7Zi{FUd8;6U6l zAO*-xq`uITT+B`Bfjd*V0XKo)EEz9}Bm`(Y{uUYhDy97hG+FNHXc_g06Y;ai9seMep)D`wkP5qo@lK}s+H`Tq0|Rq57UW4^yQSF zbZ49I@p(Rj9A7O7sFMT&9mhq8Cz_Ibb*($7j(X**n|>f~CLN#Zi*VD`VLHujj+Yex zXnBEm)2NjX8^_pNgyDv7gGM7FTRU0~&+Y;4NEsf8Grt3}BwTAFhu?ITc?6aXrw%(+ zLqoTKLSO%imxC(>#0qXGn(g_dwg&vYcXZ?@HA#Z(k;C-A2Kx~&u5}V&S;EG()T5UM zJi*Wo-uOuh;=pqjI1;J6`Z-xwS7}za3#!TV0y_D zS$GY|k*%y~SNLm#owfY+ZYtgS&%#%c$_35(sH_E-$<RpN}Lk$%2diRBPZrtIi4woM(CdzvtH%OFd^)kcaB}=7=y}_)Q_a**s z`SecJafPI6ferLCJ+!{%)@;4mX&EVze<3_w<+1t_J@4=rJBo2it)1_&MKN-V>(&-D zQ(cZ@$Gt@iSjM&;naUm~3fs@q14`4x$=UU=%Rg3cJRm%v80{VmjT~`q5!!_^|NUIi zKS`JnQTfW9)k&l_bFco8HIlWLs5Z{p2oC@p9PJl?LoazPeah1jTGOR;*nA$4DU@y1 zxMT2Enaw17Egsf3-4nPdVxw{o5!X|C&k3Q?b8-QPyluDW3I>KFw$aE5Xh(U#UF(5X zGZIaLp8ks#9gDc$lsRuZx`9;oZA`Wi;b=;jvBc1BE;1bA?1-`c;-PIrwD5jiTYTo1 zWQ}Lt5_`5-tdt?i%QHeVmG!J+7fg@BOI4S1<)fH%&amrb*kf(p_8HNb_PV5mP5d83STtC-vZ=?V|hl7UWD9!-&s=b;UU#F8sIn zcKKU`OMWUb!MmJIh32eQ%s!yP*9@pJ{s6ipmV(%R_yybRXI^=$ka15>0&=6+5Xi@y zBSTM|lk1J;v)?S$%V_p~Q2?1nv!7YQgMgu2rA*QNv=ELF57b>$;gvPWr>D7@E>PfLWrZhN4sOk=+Y!2BzIA8jsCtd)U%p~Sa1>zC>uflF5XX@O6e$gVf*J-15=V> zLoQ`bxhH#qQGWfM{XUE`(0mTYHVKISQ3qrNkJzDw6_A=%7#L`kgM}d6&9%OSSn=c_jtis;Y zWnDREySuhJCJ;M{Tl(ZM;2qL-_ypEvjCAHp+ImiQU6F+TX<}9k@=e_cfp6F{_3Lj_ zEl>#R4Iiq=oh{}MpMsipdzfklnN?fNl)v=QM+R$t8rnhcNeK#EieL8pb>#(0EtVn0-!Qp0pNkhcVvA96Tzk!HM9E z9|i@%DEdIvuW`#>#*g97pR^lHf-@q+8gBl8%ek2FYPMc5Ry`bwLKVFF+L?qh{+LqU zIE@Me$Y%=+f4*TuJDXDx*WHVawGs;aA3u+wzDt9l6uHrAa06V;ilf(ST)!`7zUd@z#;g)}+C zVDO4067M;>qsbE2mae+`dmH5wwb3BkT?Y!xn^WZs%H@_DeYjsuzYBRby(0p0ape;} zt6aZ+%_$)?lA6||Gtec`kA9*q5^TT!l|=i2JiL{0l8nX$(v8}3fEFaqP0wW0Be7FT z8!xVh)W29~y7p4ZEV6uS$hLbYeI-0<*nvkc9p^@P!O0i0?YGcz?Frm*KeSQ53^!h3 zwtTQZl^LwcN-SiLAl|jQEDON6p>d(3;!ymR-@LRhlQJ^tig@frF|&;uH-sIg^vA6d zMLEvlu!d)(p)Aod4T-HJqIte;5k_6`IIkv^bU7h(sk50TqceBoqx^wQh34XA>h+cD zMUFSn3c6;3(~rH{*dYbLJth6L>cj_|)7PjiW~g>Yo58LDV*GwIl2Ggbtm~U)S0>IFfBqLsRB*VvmAI#p z2i{k&<1BuHepgw~@$;uMHTz{x2W&%);ubw??9xq1X~m(vqJ+ejFVmzhfa+Qj!khQ?Q!u}&&oRmX!e9CY>7GBpP>-3Y8f&5oqQO7Db!iLx(%A>If zbeLK3o^U>f>l(lKf@~GW=yr~ew66~-NE`^fS4)&WWDawUEakDZ5qyEXO(3n_CUtJ~ zth?TIw*Q@Ji5$!2i(7D-mwxYXUy7^FP2l@V(ADHZGX0g08_h6wY?qE3aIzB&*Nv=7 zxxlw<_SLD#vytAMB478(`$BitS<|BLjocsjpDaG0y!|sk9%MI`mD9e;s^5-JAQf0% z&D+8PEmoA!qhf`u@_`eyv%jiAeA|$tQOrYp#7%sQ(8A^7{e(=M%_z&@xyrDwinH3wOq2cuHn6I(R3eRCzw-|wK+$X6T9b_jS(Bq z4H_BNQ4)Ws`R@i#uHZ=kH+KYPTfv&2K0PGN4;{O=>%W*ET9z_SV`*Pe<}X%qMa{6` zDVKpaO1V47R|5`-c8n~TG0kY$R-oq3l6@SL;T)9h)kfuXR?fjT!BvUuixWc#0RHF~u*@%Lx4P?6J0}z&>XYgw7)LC|?K^Ep?i#zA_JrkF8$IIBil9l+87pYwuT;D`1{9Vip|6W#TPt3FvhvvB#caDJTJ4? zQz$@e09)uw0S|-G^6~pmnUFQ!uxuPC3=| zj7)w@sz%tTrZbl6-~^aU(#WZ46tnuUj;8C)J6_;^_AZ{~fvP7}=9z^y^i$2E6s5HY z4#+6xJg$WjYl=Q&@!QDY*R&e%f)e6pVn4sI2^t)er)7_4P9Zh8@hx}iP5*oiR~m#F z3P$@EeO=v#ubt*99)BSksn$tteFANY@WNTn4H%0HnhhGH$X>y{y}!7&ae9ilFeq7n zdSoy^$m1`MgnwIjcel~tNMzYz<$Dn6O6xkH5ScE9?n*PP)0-4d4SL&JZ&M12V8$`h z>2Ki*)b*$U_29W}Q(}(i8LiMV(5QFP|iaiL0!omFIk5lC(i-l>Zi*l~{ zDZ5U{ebQ;L0h_*yx@msp)i&a6ne~TPru+Ku?6YX&SMPPc-w07(I$TlIa(1|W!@qy9 zCW+nV-75gfrTg2E+x2vX>RUAs&dB<)!O=4MWZ<~GGG@5s-j{*_x5l*lvQMn!^r5D3 zWm==hfe%SDo9V*fZ}{PREJ z&hB3I$xo4uvXh*2NuPy=5C5q=eXi|e+x|KIegC~VB(Soa@2;=QrjbJ_h*Ash>6!GR zgKD|i^zU^AuBDh28lZMw;@^S~1VoIqn@rf3^0bavm{D^kSVsuq2O2MPNmB+af-q|7}?Mfro;9^aEQS2~!j|1;> zKqlGVXIR)l$+QJxJ^AAS8^g~8Y$dI;=ia_3zuG^vjt9O^uiuUi8GME)5|X)CjN%3G zmshYTDWvPf9ywhLNUuNOiqR?|}Idcd1?LmvRA8 z!*p+}Ojo3&gQrjF-T3qsE3EnAA++tvhy`wqe-^c&Bdlc^9n}D&aqUwnM66g1j+f42 zPULW!_P>&&zJRFbjcwhdbSiJ~+cSvb1VAhzECFFuPGsSUr(G@=cW;}bzDXS5;vwcN z+K4e78Fjzb4VfSL*M}})O`jitTXlB2_wYB?0+8E-@oGbjBiw$y#&HS`M1fA=Q2GIn z`cf@w;eb)twgvnM#%r0BRzsv0t(z1{AF8VXZ=%C*vNTUqno5e)L*rDWw*E525}Hwp z*Q9BlrZA-e=T@Zl{UXgCdR&SFAt*|hyw!*>Mw|b?CB1KjG<)iCDIP?#C|%)JBhr{@ zUh#j+lR)1J_*0ZdO9XwUu0{kswIYM{_z+8G20eZgfq9zblt#pVrO;bf46&qUl)l-d zDcvST8gtDnGWd>5vu7UD<3lWq(*IBS|MvaQNecY_ZB}IZrPqmw8>B<|T(I-*XcK;X o29uS&paj4IJqX_B)n8;~|4{-CQtkhltOve_1IFQhDBivM4 Date: Tue, 25 Jun 2024 13:19:17 -0400 Subject: [PATCH 13/17] Indel plotting order correction --- R/plotting.R | 43 +++++++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/R/plotting.R b/R/plotting.R index 19f1f3c4..41047f72 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -189,6 +189,9 @@ plot_signatures <- function(result, plotly = FALSE, y_axis_spacing = rep(strrep(" ", max_num_digits), 2) } + # add context for x-axis label + plot_dat$df$context <- rep(annot$context, each = num_sigs) + # Plot signatures plot_dat$df %>% ggplot(aes_string(y = "exposure", x = "motif", fill = "mutation_color")) + @@ -197,7 +200,7 @@ plot_signatures <- function(result, plotly = FALSE, ggplot2::xlab("Motifs") + ggplot2::ylab(y_axis_label) + ggplot2::guides(fill = ggplot2::guide_legend(nrow = 1)) + ggplot2::scale_fill_manual(values = color_mapping) + - ggplot2::scale_x_discrete(labels = annot$context) + + ggplot2::scale_x_discrete(limits = plot_dat$df$motif, labels = plot_dat$df$context) + ggplot2::scale_y_continuous(expand = expansion(mult = c(0, 0.2)), limits = c(0, NA), n.breaks = 5) + ggplot2::geom_text(data = sig_name_labels, @@ -234,15 +237,27 @@ plot_signatures <- function(result, plotly = FALSE, limits = c(0, NA), breaks = c(0, 0.01), labels = y_axis_spacing, n.breaks = 4) + ggplot2::ylab("") + - ggplot2::geom_rect(data = motif_label_locations, + ggplot2::geom_rect(data = motif_label_locations %>% arrange(x), aes(xmin = x, xmax = xend, ymin = max(y), ymax = max(yend)), - fill = color_mapping, color = "black", - linewidth = 0.25, inherit.aes = FALSE) + - ggplot2::geom_text(data=motif_label_locations, - aes(x=x+(xend-x)/2, y=y+(yend-y)/2, - label = stringr::str_to_title(mutation_color)), - fontface = "bold", size = 4, - color = label_colors) -> p2 + fill = factor(color_mapping, levels = color_mapping), color = "black", + linewidth = 0.25, inherit.aes = FALSE) -> p2 + + # adjust motif label direction if indel signature + if (table_name %in% c("IND83", "INDEL83", "INDEL", "IND", "indel", + "Indel")){ + p2 <- p2 + ggplot2::geom_text(data=motif_label_locations, + aes(x=x+(xend-x)/2, y=y+(yend-y)/2, + label = stringr::str_to_title(mutation_color)), + fontface = "bold", size = 4, + color = label_colors, angle = 90) + } + else{ + p2 <- p2 + ggplot2::geom_text(data=motif_label_locations, + aes(x=x+(xend-x)/2, y=y+(yend-y)/2, + label = stringr::str_to_title(mutation_color)), + fontface = "bold", size = 4, + color = label_colors) + } # Adjust theme @@ -275,8 +290,16 @@ plot_signatures <- function(result, plotly = FALSE, axis.title.y = element_blank()) } + # adjust height of motif lables if indel signature + if (table_name %in% c("IND83", "INDEL83", "INDEL", "IND", "indel", + "Indel")){ + height <- 5 + } + else{ + height <- 1 + } - figure <- ggpubr::ggarrange(p2, p, ncol = 1, nrow = 2, heights = c(1,15)) + figure <- ggpubr::ggarrange(p2, p, ncol = 1, nrow = 2, heights = c(height,15)) if (isTRUE(plotly)) { figure <- plotly::ggplotly(p) From 2286a66d56ac152b6425af2ee27de677b330a7a2 Mon Sep 17 00:00:00 2001 From: Natasha Gurevich Date: Tue, 25 Jun 2024 13:20:38 -0400 Subject: [PATCH 14/17] Benchmarking tutorial and other tutorial updates --- vignettes/articles/benchmarking_tutorial.Rmd | 200 +++++++ vignettes/articles/tutorial_tcga_cnsl.Rmd | 528 ++++++++++++++----- vignettes/musicatk.Rmd | 381 +++++++++---- 3 files changed, 886 insertions(+), 223 deletions(-) create mode 100644 vignettes/articles/benchmarking_tutorial.Rmd diff --git a/vignettes/articles/benchmarking_tutorial.Rmd b/vignettes/articles/benchmarking_tutorial.Rmd new file mode 100644 index 00000000..d8aada44 --- /dev/null +++ b/vignettes/articles/benchmarking_tutorial.Rmd @@ -0,0 +1,200 @@ +--- +title: "Benchmarking signaure predictions against a ground truth" +date: "Compiled `r format(Sys.time(), '%B %d, %Y')`" +author: "Natasha Gurevich, Joshua Campbell" +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set(warning = FALSE, fig.align='center') +``` +# Introduction +write something here..... + +```{r setup} +library(musicatk) +``` + +# Step 1. Initializing the Benchmarking Structure + +Results from all analyses related to the same ground truth signatures and loadings can be stored in one `full_benchmark` object. Benchmarking can be performed on multiple prediction instances and stored in one place, allowing for easy comparison between discovery methods or discovery parameters. + +The `full_benchmark` object is initialized using the ground truth signatures and loadings, both of which must be a matrix. The signatures matrix should contain signatures as columns mutation types as rows. Each cell details that mutation type's percent contribution to the given signature. The loadings matrix should contain signatures as columns and samples as rows. Each cell details the number of mutations in the sample that are attributed to the given signature. + +The example below prepares a `full_benchmark` object using a snythetic breast cancer dataset provided in the package. + +```{r prepare ground truth} + +# prepare true signatures +true_sigs <- c("SBS1", "SBS2", "SBS3", "SBS8", "SBS13", "SBS17", "SBS18", "SBS26") +true_signatures <- signatures(cosmic_v2_sigs)[,true_sigs] + +# load true exposures +true_exposures <- synthetic_breast_true_exposures + +# load count table +count_table <- synthetic_breast_counts + +``` + +``` {r initialize full_benchmark object} + +# initialize full benchmark object with ground truth +full_benchmark <- create_benchmark(true_signatures, true_exposures, count_table) + +``` + +# Step 2. Prepare prediction to benchmark + +In order to benchmark the results of signature discovery, the prediction results must be stored in a `musica_result` object. + +## If signature discovery has already been completed externally + +If signature discovery has already been performed and therefore predicted signatures and predicted loadings already exist, this information must be stored in a `musica_result` object using the `create_musica_result` function. Example data is used here. + +```{r create musica result object} + +# read in predicted signatures +predicted_sigs <- example_predicted_sigs + +# read in predicted loadings +predicted_loadings <- example_predicted_exp + +# store in a musica result object +res1 <- create_musica_result(predicted_sigs, predicted_loadings, count_table) + +``` + +## If signature discovery has not yet been performed + +If signature discovery has not yet been performed, it cane be done via the NMF or LDA algorithms within the musicatk package. Mutation data can be read in and stored in a `musica` object either from a file of variants or from a +count table using the standard procedure. + +In this example, mutation data is stored in a count table, which is used to create a `musica` object. Signature +discovery is then performed using the NMF algorithm and 8 signatures. + +```{r perform signature discovery} + +# create musica object from count table +count_table <- synthetic_breast_counts +musica <- create_musica_from_counts(count_table, "SBS96") + +# prediction +res2 <- discover_signatures(musica, "SBS96", num_signatures = 9, algorithm = "nmf") + +``` + +# Step 3. Perform benchmarking + +Once the prediction is stored in a `musica_resut` object, and the ground truth is stored in a `full_benchmark` object, benchmarking can be performed for the prediction. To perform benchmarking in the simplest form with all default parameters, the following command is used. The inputted `full_benchmark` object is updated within the function, and nothing is returned. Since no `method_id` is supplied, it is automatically generated based on the variable name of the provided prediction. + +```{r perform benchmarking} + +benchmark(full_benchmark, res2) + +``` + +## Returning a new object from the benchmarking process + +If a new `full_benchmark` object is desired, the `make_copy` argument can be used to return a new object, leaving the inputted one unchanged. Below, the res1 prediction will be benchmarked and the result is saved to a new object. For better readability, all plots are suppressed. + +```{r perform benchmarking} + +new_benchmark <- benchmark(full_benchmark, res1, plot = FALSE) + +``` + +# Step 4: Downstream analyses and visualization + +Any element of the `full_benchmark` or `single_benchmark` objects can be easily extracted for further individualized analysis, and all plots generated during the benchmarking process can be recreated. + +## Summary Tables + +Summary tables can easily be extracted from a `full_benchmark` object to visualize the results of the full analysis. + +```{r view summary} + +method_view_summary(full_benchmark) + +``` + +```{r view sig view summary} + +sig_view_summary(full_benchmark) + +``` + +## Accessing individual benchmark results or predictions + +Functions to access an individual benchmark from the full object, or to specifically access a prediction at an any step within a benchmarking process make individualized downstream analyses easier. + +```{r extract a single benchmark} + +# pull out the single benchmark object for res2 from the full_benchmark object +res2_benchmark <- benchmark_get_entry(full_benchmark, "res2") + +``` + +Once a single benchmark object has been extracted, the three different predictions within it can be extracted. These include (1) the prediction (predicted signatures and exposures) before any benchmarking adjustments have been made, (2) the prediction after duplicate signatures have been adjusted, and (3) the final prediction, after both duplicate and composite signatures have been adjusted. + +```{r extract a prediction} + +# extract the res2 prediction before becnhamrking adjustments have been made +res2_initial_prediction <- benchmark_get_prediction(res2_benchmark, "initial") + +# extract the res2 prediction after duplicate signatures have been corrected, but before composites have been corrected +res2_initial_prediction <- benchmark_get_prediction(res2_benchmark, "intermediate") + +# extract the res2 prediction at the end of its benchmarking process +res2_final_prediction <- benchmark_get_prediction(res2_benchmark, "final") + +``` + +## Comparison table between true and predicted signatures + +To extract a comparison between the predicted and true signatures from any step in the benchmarking process, the `benchmark_compare_results` function can be used. + +``` {r compare results} + +benchmark_compare_results(full_benchmark, "res2", "Initial") + +``` + +## Recreating plots generated during the benchmarking process + +A comparison between predicted and true signatures from any step in the benchmarking process can be plotted with the `benchmark_plot_comparison` function. + +```{r plot signature comparison} + +benchmark_plot_comparison(full_benchmark, "res2", "Initial") + +``` + +Predicted signatures from any step in the benchmarking process can be plotted with the `benchmark_plot_signatures` function. All of the customization options found in the generic `plot_signatures` function, such as `same_scale`, `show_x_labels`, `show_y_labels`, etc are valid. + +```{r plot signatures} + +benchmark_plot_signatures(full_benchmark, "res2", "Intermediate", same_scale = FALSE) + +``` + +A comparison between predicted and true exposures from any step in the benchmarking process can be plotted with the `benchmark_plot_exposures` function. + +```{r plot exposure comparison} + +benchmark_plot_exposures(full_benchmark, "res2", "Initial") + +``` + +To recreate the before/after plots comparing predicted and true exposures before/after duplicate or composite signatures are corrected, the `benchmark_plot_duplicate_exposures` and `benchmark_plot_composite_exposures` functions can be used, respectively. + +```{r plot duplicate/composite exposures before/after adjustment} + +benchmark_plot_duplicate_exposures(full_benchmark, "res2") +benchmark_plot_composite_exposures(full_benchmark, "res2") + +``` + + + + + diff --git a/vignettes/articles/tutorial_tcga_cnsl.Rmd b/vignettes/articles/tutorial_tcga_cnsl.Rmd index 2856b6ab..6c5fe66b 100644 --- a/vignettes/articles/tutorial_tcga_cnsl.Rmd +++ b/vignettes/articles/tutorial_tcga_cnsl.Rmd @@ -1,7 +1,11 @@ --- title: "Analysis of mutational signatures with musicatk in the R console" -date: "Compiled `r format(Sys.time(), '%B %d, %Y')`" author: "Aaron Chevalier, Joshua Campbell" +date: "Compiled `r format(Sys.time(), '%B %d, %Y')`" +output: word_document +editor_options: + markdown: + wrap: 72 --- ```{r setup, include = FALSE} @@ -9,28 +13,68 @@ knitr::opts_chunk$set(warning = FALSE, fig.align='center') ``` # Introduction -A variety of exogenous exposures or endogenous biological processes can contribute to the overall mutational load observed in human tumors. Many different mutational patterns, or “mutational signatures”, have been identified across different tumor types. These signatures can provide a record of environmental exposure and can give clues about the etiology of carcinogenesis. The Mutational Signature Comprehensive Analysis Toolkit (musicatk) contains a complete end-to-end workflow for characterization of mutational signatures in a cohort of samples. musicatk has utilities for extracting variants from a variety of file formats, multiple methods for discovery of novel signatures or prediction of pre-existing signatures, and many types of downstream visualizations for exploratory analysis. This package has the ability to parse and combine multiple motif classes in the mutational signature discovery or prediction processes. Mutation motifs include single base substitutions (SBS), double base substitutions (DBS), insertions (INS) and deletions (DEL). The package can be loaded using the `library` command: + +A variety of exogenous exposures or endogenous biological processes can +contribute to the overall mutational load observed in human tumors. Many +different mutational patterns, or “mutational signatures”, have been +identified across different tumor types. These signatures can provide a +record of environmental exposure and can give clues about the etiology +of carcinogenesis. The Mutational Signature Comprehensive Analysis +Toolkit (musicatk) contains a complete end-to-end workflow for +characterization of mutational signatures in a cohort of samples. +musicatk has utilities for extracting variants from a variety of file +formats, multiple methods for discovery of novel signatures or +prediction of pre-existing signatures, and many types of downstream +visualizations for exploratory analysis. This package has the ability to +parse and combine multiple motif classes in the mutational signature +discovery or prediction processes. Mutation motifs include single base +substitutions (SBS), double base substitutions (DBS), insertions (INS) +and deletions (DEL). The package can be loaded using the `library` +command: ```{r library, eval = TRUE, message = FALSE} library(musicatk) ``` # Importing mutational data -In order to discover or predict mutational signatures, we must first set up -our musica object by 1) extracting variants from files or objects such as -VCFs and MAFs, 2) selecting the appropriate reference genome 3) creating a -musica object, 4) adding sample-level annotations, and 5) building a count tables for our variants of interest. -## Import variants from files +In order to discover or predict mutational signatures, we must first set +up our musica object by 1) extracting variants from files or objects +such as VCFs and MAFs, 2) selecting the appropriate reference genome 3) +creating a musica object, 4) adding sample-level annotations, and 5) +building a count tables for our variants of interest. -Variants can be extracted from various formats using the following functions: +## Import variants from files -* The `extract_variants_from_vcf_file()` function will extract variants from a [VCF](https://samtools.github.io/hts-specs/) file. The file will be imported using the readVcf function from the [VariantAnnotation](https://bioconductor.org/packages/release/bioc/html/VariantAnnotation.html) package and then the variant information will be extracted from this object. -* The `extract_variants_from_vcf()` function extracts variants from a `CollapsedVCF` or `ExpandedVCF` object from the [VariantAnnotation](https://bioconductor.org/packages/release/bioc/html/VariantAnnotation.html) package. -* The `extract_variants_from_maf_file()` function will extract variants from a file in [Mutation Annotation Format (MAF)](https://docs.gdc.cancer.gov/Data/File_Formats/MAF_Format/) used by TCGA. -* The `extract_variants_from_maf()` function will extract variants from a MAF object created by the [maftools](https://www.bioconductor.org/packages/release/bioc/html/maftools.html) package. -* The `extract_variants_from_matrix()` function will get the information from a matrix or data.frame like object that has columns for the chromosome, start position, end position, reference allele, mutation allele, and sample name. -* The `extract_variants()` function will extract variants from a list of objects. These objects can be any combination of VCF files, VariantAnnotation objects, MAF files, MAF objects, and data.frame objects. +Variants can be extracted from various formats using the following +functions: + +- The `extract_variants_from_vcf_file()` function will extract + variants from a [VCF](https://samtools.github.io/hts-specs/) file. + The file will be imported using the readVcf function from the + [VariantAnnotation](https://bioconductor.org/packages/release/bioc/html/VariantAnnotation.html) + package and then the variant information will be extracted from this + object. +- The `extract_variants_from_vcf()` function extracts variants from a + `CollapsedVCF` or `ExpandedVCF` object from the + [VariantAnnotation](https://bioconductor.org/packages/release/bioc/html/VariantAnnotation.html) + package. +- The `extract_variants_from_maf_file()` function will extract + variants from a file in [Mutation Annotation Format + (MAF)](https://docs.gdc.cancer.gov/Data/File_Formats/MAF_Format/) + used by TCGA. +- The `extract_variants_from_maf()` function will extract variants + from a MAF object created by the + [maftools](https://www.bioconductor.org/packages/release/bioc/html/maftools.html) + package. +- The `extract_variants_from_matrix()` function will get the + information from a matrix or data.frame like object that has columns + for the chromosome, start position, end position, reference allele, + mutation allele, and sample name. +- The `extract_variants()` function will extract variants from a list + of objects. These objects can be any combination of VCF files, + VariantAnnotation objects, MAF files, MAF objects, and data.frame + objects. Below are some examples of extracting variants from MAF and VCF files: @@ -52,7 +96,9 @@ variants <- extract_variants(c(lusc_maf, luad_vcf, melanoma_vcfs)) ## Import TCGA datasets -For this tutorial, we will analyze mutational data from lung and skin tumors from TCGA. This data will be retrieved using the the `GDCquery` function from `r BiocStyle::Biocpkg("TCGAbiolinks")` package. +For this tutorial, we will analyze mutational data from lung and skin +tumors from TCGA. This data will be retrieved using the the `GDCquery` +function from `r BiocStyle::Biocpkg("TCGAbiolinks")` package. ```{r get_tcga, message = FALSE, results='hide'} library(TCGAbiolinks) @@ -82,70 +128,103 @@ colnames(annot) <- c("Tumor_Type", "ID") rownames(annot) <- annot[,"ID"] ``` -Note that with previous versions of the GDC database, you may need to set `worflow.type` to another string such as `workflow.type = MuTect2 Variant Aggregation and Masking`. +Note that with previous versions of the GDC database, you may need to +set `worflow.type` to another string such as +`workflow.type = MuTect2 Variant Aggregation and Masking`. -# Creating a musica object +# Creating a musica object from variants -A genome build must first be selected before a musica object can be created for mutational signature analysis. musicatk uses `r BiocStyle::Biocpkg("BSgenome")` objects to access genome sequence information that flanks each mutation which is used bases for generating mutation count tables. BSgenome objects store full genome sequences for different organisms. A full list of supported organisms can be obtained by running `available.genomes()` after loading the BSgenome library. Custom genomes can be forged as well (see `r BiocStyle::Biocpkg("BSgenome")` documentation). musicatk provides a utility function called `select_genome()` to allow users to quickly select human genome build versions "hg19" and "hg38" or mouse genome builds "mm9" and "mm10". The reference sequences for these genomes are in UCSC format (e.g. chr1). +A genome build must first be selected before a musica object can be +created for mutational signature analysis. musicatk uses +`r BiocStyle::Biocpkg("BSgenome")` objects to access genome sequence +information that flanks each mutation which is used bases for generating +mutation count tables. BSgenome objects store full genome sequences for +different organisms. A full list of supported organisms can be obtained +by running `available.genomes()` after loading the BSgenome library. +Custom genomes can be forged as well (see +`r BiocStyle::Biocpkg("BSgenome")` documentation). musicatk provides a +utility function called `select_genome()` to allow users to quickly +select human genome build versions "hg19" and "hg38" or mouse genome +builds "mm9" and "mm10". The reference sequences for these genomes are +in UCSC format (e.g. chr1). ```{r select_genome} g <- select_genome("hg38") ``` -The last preprocessing step is to create an object with the variants and the genome using the `create_musica` function. This function will perform checks to ensure that the chromosome names and reference alleles in the input variant object match those in supplied BSgenome object. These checks can be turned off by setting `check_ref_chromosomes = FALSE` and `check_ref_bases = FALSE`, respectively. This function also looks for adjacent single base substitutions (SBSs) and will convert them to double base substitutions (DBSs). To disable this automatic conversion, set `convert_dbs = FALSE`. +The last preprocessing step is to create an object with the variants and +the genome using the `create_musica` function. This function will +perform checks to ensure that the chromosome names and reference alleles +in the input variant object match those in supplied BSgenome object. +These checks can be turned off by setting +`check_ref_chromosomes = FALSE` and `check_ref_bases = FALSE`, +respectively. This function also looks for adjacent single base +substitutions (SBSs) and will convert them to double base substitutions +(DBSs). To disable this automatic conversion, set `convert_dbs = FALSE`. -```{r create_musica} -musica <- create_musica(x = variants, genome = g) +```{r create_musica_from_variants} +musica <- create_musica_from_variants(x = variants, genome = g) ``` # Importing sample annotations -Sample-level annotations, such as tumor type, treatment, or outcome can be used in downstream analyses. Sample annotations that are stored in a `vector` or `data.frame` can be directly added to the `musica` object using the `samp_annot` function: +Sample-level annotations, such as tumor type, treatment, or outcome can +be used in downstream analyses. Sample annotations that are stored in a +`vector` or `data.frame` can be directly added to the `musica` object +using the `samp_annot` function: ```{r musica_add_annotations} id <- as.character(sample_names(musica)) samp_annot(musica, "Tumor_Type") <- annot[id,"Tumor_Type"] ``` -> **Note: Be sure that the annotation vector or data.frame being supplied is in the same order as the samples in the `musica` object.** The `sample_names` function can be used to get the order of the samples in the musica object. Note that the annotations can also be added later on to a `musica_result` objects created by discovery or prediction using the same function: `samp_annot(result, "Tumor_Type") <- annot[id,"Tumor_Type"]`. - +> **Note: Be sure that the annotation vector or data.frame being +> supplied is in the same order as the samples in the `musica` object.** +> The `sample_names` function can be used to get the order of the +> samples in the musica object. Note that the annotations can also be +> added later on to a `musica_result` objects created by discovery or +> prediction using the same function: +> `samp_annot(result, "Tumor_Type") <- annot[id,"Tumor_Type"]`. # Creating mutation count tables ## Create standard tables -Motifs are the building blocks of mutational signatures. Motifs themselves are -a mutation combined with other genomic information. For instance, **SBS96** -motifs are constructed from an SBS mutation and one upstream and one downstream -base sandwiched together. We build tables by counting these motifs for each -sample. + +Motifs are the building blocks of mutational signatures. Motifs +themselves are a mutation combined with other genomic information. For +instance, **SBS96** motifs are constructed from an SBS mutation and one +upstream and one downstream base sandwiched together. We build tables by +counting these motifs for each sample. + ```{r build_tables} build_standard_table(musica, g = g, table_name = "SBS96") ``` -Here is a list of mutation tables that can be created by setting the +Here is a list of mutation tables that can be created by setting the `table_name` parameter in the `build_standard_table` function: -* SBS96 - Motifs are the six possible single base pair mutation types times the -four possibilities each for upstream and downstream context bases (4*6*4 = 96 -motifs) -* SBS192_Trans - Motifs are an extension of SBS96 multiplied by the -transcriptional strand (translated/untranslated), can be specified with -`"Transcript_Strand"`. -* SBS192_Rep - Motifs are an extension of SBS96 multiplied by the -replication strand (leading/lagging), can be specified with -`"Replication_Strand"`. -* DBS - Motifs are the 78 possible double-base-pair substitutions -* INDEL - Motifs are 83 categories intended to capture different categories of -indels based on base-pair change, repeats, or microhomology, insertion or -deletion, and length. +- SBS96 - Motifs are the six possible single base pair mutation types + times the four possibilities each for upstream and downstream + context bases (4*6*4 = 96 motifs) +- SBS192_Trans - Motifs are an extension of SBS96 multiplied by the + transcriptional strand (translated/untranslated), can be specified + with `"Transcript_Strand"`. +- SBS192_Rep - Motifs are an extension of SBS96 multiplied by the + replication strand (leading/lagging), can be specified with + `"Replication_Strand"`. +- DBS - Motifs are the 78 possible double-base-pair substitutions +- INDEL - Motifs are 83 categories intended to capture different + categories of indels based on base-pair change, repeats, or + microhomology, insertion or deletion, and length. ## Combine tables -Different count tables can be combined into one using the `combine_count_tables` -function. For example, the SBS96 and the DBS tables could be combined and -mutational signature discovery could be performed across both mutations -modalities. Tables with information about the same types of variants (e.g. -two related SBS tables) should generally not be combined and used together. +Different count tables can be combined into one using the +`combine_count_tables` function. For example, the SBS96 and the DBS +tables could be combined and mutational signature discovery could be +performed across both mutations modalities. Tables with information +about the same types of variants (e.g. two related SBS tables) should +generally not be combined and used together. ```{r combine_tables} # Build Double Base Substitution table @@ -158,44 +237,66 @@ combine_count_tables(musica, to_comb = c("SBS96", "DBS78"), name = "SBS_DBS", de names(tables(musica)) ``` +# Create a musica object from an existing count table + +A musica object can be created directly from a count table if it is +available from the start. A variant object or file is not required. The +count table should contain mutational motifs as rows and samples as +columns, and must be able to be coerced to a matrix. The variant class +must also be provided, either SBS, DBS, or IND. Here, the example count +table 'synthetic_breast_counts' is used. + +```{r reate_musica_from_counts} + +musica2 <- create_musica_from_counts(x = synthetic_breast_counts, variant_class = "SBS") + +``` + # Filtering samples -Samples with low numbers of mutations should usually be excluded from discover and prediction procedures. The `subset_musica_by_counts` function can be used to exclude samples with low numbers of mutations in a particular table: +Samples with low numbers of mutations should usually be excluded from +discover and prediction procedures. The `subset_musica_by_counts` +function can be used to exclude samples with low numbers of mutations in +a particular table: ```{r sample_filter} musica_filter <- subset_musica_by_counts(musica, table_name = "SBS96", num_counts = 10) ``` -The `subset_musica_by_annotation` function can also be used to subset the musica object to samples that match a particular annotation. For example, if we only wanted to analyze lung cancer, we could filter to samples that have "LUAD" or "LUSC": +The `subset_musica_by_annotation` function can also be used to subset +the musica object to samples that match a particular annotation. For +example, if we only wanted to analyze lung cancer, we could filter to +samples that have "LUAD" or "LUSC": ```{r sample_filter_annot} musica_luad <- subset_musica_by_annotation(musica, annot_col = "Tumor_Type", annot_names = c("LUAD", "LUSC")) ``` - # Discovery of signatures and exposures Mutational signature discovery is the process of deconvoluting a matrix -containing the count of each mutation type in each sample into two matrices: 1) -a **Signature** matrix containing the probability of each mutation motif in -signature and 2) an **Exposure** matrix containing the estimated counts of each -signature in each sample. Discovery and prediction results are save in a -`musica_result` object that includes both the signatures and sample exposures. The `discover_signatures` function can be used to identify signatures in a dataset **de novo**: +containing the count of each mutation type in each sample into two +matrices: 1) a **Signature** matrix containing the probability of each +mutation motif in signature and 2) an **Exposure** matrix containing the +estimated counts of each signature in each sample. Discovery and +prediction results are save in a `musica_result` object that includes +both the signatures and sample exposures. The `discover_signatures` +function can be used to identify signatures in a dataset **de novo**: ```{r discover_sigs} result_discov <- discover_signatures(musica_filter, table_name = "SBS96", num_signatures = 4, algorithm = "lda") ``` - Supported signature discovery algorithms include: -* Non-negative matrix factorization (nmf) -* Latent Dirichlet Allocation (lda) +- Non-negative matrix factorization (nmf) +- Latent Dirichlet Allocation (lda) -Both have built-in `seed` capabilities for reproducible results, `nstarts` for -multiple independent chains from which the best final result will be chosen. -NMF also allows for parallel processing via `par_cores`. To get the signatures or exposures from the result object, the following -functions can be used: +Both have built-in `seed` capabilities for reproducible results, +`nstarts` for multiple independent chains from which the best final +result will be chosen. NMF also allows for parallel processing via +`par_cores`. To get the signatures or exposures from the result object, +the following functions can be used: ```{r result_accessors} # Extract the exposure matrix @@ -207,87 +308,151 @@ sigs <- signatures(result_discov) sigs[1:3,1:3] ``` +# Import a musica result object + +If signature discovery or prediction was performed previously or +externally, a 'musica_result' object can be created directly from the +signatures and exposures. The signatures table should contain mutational +motifs as rows and signatures as columns and be able to be coerced to a +matrix. The exposures matrix should contain signature weights as rows +and samples as columns and must also be able to be coerced to a matrix. +A mutation count table must also be provided. The method or algorithm +used to generate the results may be provided with 'algorithm'. Here, +example data from a synthetic breast cancer dataset is used. + +```{r create_musica_result} + +NMF_result <- create_musica_result(signatures = example_predicted_sigs, exposures = example_predicted_exp, count_table = synthetic_breast_counts, algorithm = "NMF") + +``` # Visualization of results ## Plot signatures -The `plot_signatures` function can be used to display barplots that show the probability of each mutation type in each signature: + +The `plot_signatures` function can be used to display barplots that show +the probability of each mutation type in each signature: ```{r plot_sigs} plot_signatures(result_discov) ``` -By default, the scales on the y-axis are forced to be the same across all -signatures. This behavior can be turned off by setting `same_scale = FALSE`: +By default, the scales on the y-axis are forced to be the same across +all signatures. This behavior can be turned off by setting +`same_scale = FALSE`: ```{r plot_sigs_same_scale} plot_signatures(result_discov, same_scale = FALSE) ``` ## Comparing to external signatures -A common analysis is to compare the signatures estimated in a dataset to those generated in other datasets or to those in the [COSMIC database](https://cancer.sanger.ac.uk/cosmic/signatures). We have a set of functions that can be used to easily perform pairwise correlations between signatures. The `compare_results` functions compares the signatures between two `musica_result` objects. The `compare_cosmic_v2` will correlate the signatures between a `musica_result` object and the SBS signatures in COSMIC V2. For example: + +A common analysis is to compare the signatures estimated in a dataset to +those generated in other datasets or to those in the [COSMIC +database](https://cancer.sanger.ac.uk/cosmic/signatures). We have a set +of functions that can be used to easily perform pairwise correlations +between signatures. The `compare_results` functions compares the +signatures between two `musica_result` objects. The `compare_cosmic_v2` +will correlate the signatures between a `musica_result` object and the +SBS signatures in COSMIC V2. For example: ```{r compare_cosmic} compare_cosmic_v2(result_discov, threshold = 0.8) ``` -In this example, our Signatures 1 and 3 were most highly correlated to COSMIC Signature 4 and 7, respectively, so this may indicate that samples in our dataset were exposed to UV radiation or cigarette smoke. Only pairs of signatures who have a correlation above the `threshold` parameter will be returned. If no pairs of signatures are found, then you may want to consider lowering the threshold. Signatures can also be correlated to those in the COSMIC V3 database using the `compare_cosmic_v3` function. +In this example, our Signatures 1 and 3 were most highly correlated to +COSMIC Signature 4 and 7, respectively, so this may indicate that +samples in our dataset were exposed to UV radiation or cigarette smoke. +Only pairs of signatures who have a correlation above the `threshold` +parameter will be returned. If no pairs of signatures are found, then +you may want to consider lowering the threshold. Signatures can also be +correlated to those in the COSMIC V3 database using the +`compare_cosmic_v3` function. -Based on the COSMIC comparison results and our prior knowledge, these signatures can be re-named and the new name can displayed in the plots: +Based on the COSMIC comparison results and our prior knowledge, these +signatures can be re-named and the new name can displayed in the plots: ```{r, name_sigs} name_signatures(result_discov, c("SBS4 - Smoking", "SBS15 - MMR", "SBS7 - UV", "SBS2/13 - APOBEC")) plot_signatures(result_discov) ``` - ## Plot exposures -Barplots showing the exposures in each sample can be plotted with the +Barplots showing the exposures in each sample can be plotted with the `plot_exposures` function: ```{r exposures_raw} plot_exposures(result_discov, plot_type = "bar") ``` -By default, samples are ordered from those with the highest number of mutations on the left to those with the lowest on the right. Sometimes, too many samples are present and the bars are too small to clearly examine the patterns of exposures. The `num_samples` parameter can be used to display the top samples with the highest number of mutations on the left: +By default, samples are ordered from those with the highest number of +mutations on the left to those with the lowest on the right. Sometimes, +too many samples are present and the bars are too small to clearly +examine the patterns of exposures. The `num_samples` parameter can be +used to display the top samples with the highest number of mutations on +the left: ```{r exposures_raw_top} plot_exposures(result_discov, plot_type = "bar", num_samples = 50) ``` -Samples can be ordered by the level of individual exposures. The can be used in combination with the `num_samples` parameter to examine the mutational patterns in the samples with the highest levels of a particular exposure. For example, samples can be ordered by the number of estimated mutations from the MMR signature: + +Samples can be ordered by the level of individual exposures. The can be +used in combination with the `num_samples` parameter to examine the +mutational patterns in the samples with the highest levels of a +particular exposure. For example, samples can be ordered by the number +of estimated mutations from the MMR signature: ```{r exposures_raw_sort} plot_exposures(result_discov, plot_type = "bar", num_samples = 50, sort_samples = "SBS15 - MMR") ``` - -The proportion of each exposure in each tumor can be shown by setting `proportional = TRUE`: +The proportion of each exposure in each tumor can be shown by setting +`proportional = TRUE`: ```{r exposures_prop} plot_exposures(result_discov, plot_type = "bar", num_samples = 50, proportional = TRUE) ``` -The `plot_exposures` function can group exposures by either a sample annotation or by a signature by setting the `group_by` parameter. To group by an annotation, the `groupBy` parameter must be set to `"annotation"` and the name of the annotation must be supplied via the `annotation` parameter. For example, the exposures from the previous result can be grouped by the `Tumor_Type` annotation: +The `plot_exposures` function can group exposures by either a sample +annotation or by a signature by setting the `group_by` parameter. To +group by an annotation, the `groupBy` parameter must be set to +`"annotation"` and the name of the annotation must be supplied via the +`annotation` parameter. For example, the exposures from the previous +result can be grouped by the `Tumor_Type` annotation: ```{r plot_exposures_by_subtype} plot_exposures(result_discov, plot_type = "bar", group_by = "annotation", annotation = "Tumor_Type") ``` -In this plot, it is clear that the smoking signature is more active in the lung cancers while the UV signature is more active in the skin cancers. The distribution of exposures with respect to annotation can be viewed using boxplots by setting `plot_type = "box"` and `group_by = "annotation"`: +In this plot, it is clear that the smoking signature is more active in +the lung cancers while the UV signature is more active in the skin +cancers. The distribution of exposures with respect to annotation can be +viewed using boxplots by setting `plot_type = "box"` and +`group_by = "annotation"`: ```{r plot_exposures_box_annot} plot_exposures(result_discov, plot_type = "box", group_by = "annotation", annotation = "Tumor_Type") ``` -Note that boxplots can be converted to violin plots by setting `plot_type = "violin"`. To compare the exposures levels across groups of samples within a signature, we can set `group_by = "signature"` and `color_by = "annotation"`: +Note that boxplots can be converted to violin plots by setting +`plot_type = "violin"`. To compare the exposures levels across groups of +samples within a signature, we can set `group_by = "signature"` and +`color_by = "annotation"`: ```{r plot_exposures_box_sig} plot_exposures(result_discov, plot_type = "box", group_by = "signature", color_by = "annotation", annotation = "Tumor_Type") ``` -To verify that the deconvolution algorithm produced good signatures, one strategy is to examine the patterns of mutations in individual samples with a high predicted percentage of a particular signature. If the shape of the counts match the patterns of the signature, then this is a good indicator that the deconvolution algorithm worked well. Counts for individual samples can be plotted with the `plot_sample_counts` function. For example, we can plot the sample with the highest proportion of the APOBEC signature: +To verify that the deconvolution algorithm produced good signatures, one +strategy is to examine the patterns of mutations in individual samples +with a high predicted percentage of a particular signature. If the shape +of the counts match the patterns of the signature, then this is a good +indicator that the deconvolution algorithm worked well. Counts for +individual samples can be plotted with the `plot_sample_counts` +function. For example, we can plot the sample with the highest +proportion of the APOBEC signature: ```{r sample_counts} # Normalize exposures @@ -298,12 +463,24 @@ ix <- c(which.max(expos.prop[2,]), which.max(expos.prop[4,])) plot_sample_counts(musica_filter, sample_names = colnames(expos.prop)[ix], table_name = "SBS96") ``` - # Predict exposures from existing signatures ## Predict COSMIC signatures -Instead of discovering mutational signatures and exposures from a dataset *de novo*, a better result may be obtained by predicting the exposures of signatures that have been previously estimated in other datasets. Predicting exposures for pre-existing signatures may have more sensitivity for detecting active compared to the discovery-based methods as we are incorporating prior information derived from larger datasets. The `musicatk` package incorporates several methods for estimating exposures given a set of pre-existing signatures. For example, the exposures for COSMIC signatures 1, 4, 7, 13, and 15 can be predicted in our current dataset. Note that we are including COSMIC signature 1 in the prediction even though it did not show up in the discovery algorithm as this signature has been previously shown to be active in lung tumors and we are also including both APOBEC signatures (2 and 13) which were previously combined into 1 signature in the discovery method. +Instead of discovering mutational signatures and exposures from a +dataset *de novo*, a better result may be obtained by predicting the +exposures of signatures that have been previously estimated in other +datasets. Predicting exposures for pre-existing signatures may have more +sensitivity for detecting active compared to the discovery-based methods +as we are incorporating prior information derived from larger datasets. +The `musicatk` package incorporates several methods for estimating +exposures given a set of pre-existing signatures. For example, the +exposures for COSMIC signatures 1, 4, 7, 13, and 15 can be predicted in +our current dataset. Note that we are including COSMIC signature 1 in +the prediction even though it did not show up in the discovery algorithm +as this signature has been previously shown to be active in lung tumors +and we are also including both APOBEC signatures (2 and 13) which were +previously combined into 1 signature in the discovery method. ```{r predict_cosmic} @@ -317,11 +494,29 @@ result_cosmic_selected_sigs <- predict_exposure(musica = musica_filter, table_na plot_exposures(result_cosmic_selected_sigs, plot_type = "bar", num_samples = 50) ``` -The `cosmic_v2_sigs` object is just a `musica_result` object containing COSMIC V2 signatures without any sample or exposure information. Note that if `signatures_to_use` is not supplied by the user, then exposures for all signatures in the result object will be estimated. Any `musical_result` object can be given to the `signature_res` parameter. Exposures can be predicted for samples in any `musica` object from any `musica_result` object as long as the same mutation schema was utilized. +The `cosmic_v2_sigs` object is just a `musica_result` object containing +COSMIC V2 signatures without any sample or exposure information. Note +that if `signatures_to_use` is not supplied by the user, then exposures +for all signatures in the result object will be estimated. Any +`musical_result` object can be given to the `signature_res` parameter. +Exposures can be predicted for samples in any `musica` object from any +`musica_result` object as long as the same mutation schema was utilized. ## Prediction with signature selection -In many cases, researchers will not know the signatures that are active in a cohort of samples beforehand. While it would be easy to predict all COSMIC signatures, this can have detrimental effects on the output. Including signatures not actually active in the cohort of samples may introduce additional noise in the estimates for the exposures for the signatures that are truly present in the dataset. Additionally, including extra signatures may induce a false signal for the exposures of the non-active signatures. The `musicatk` package has a "two-step" prediction process. In the first step, exposures for all signatures will be estimated. Then a subset of signatures will be selected as "active" in the dataset and only the exposures for the active signatures will be estimated. This two-step process can be done automatically using the `auto_predict_grid` function: +In many cases, researchers will not know the signatures that are active +in a cohort of samples beforehand. While it would be easy to predict all +COSMIC signatures, this can have detrimental effects on the output. +Including signatures not actually active in the cohort of samples may +introduce additional noise in the estimates for the exposures for the +signatures that are truly present in the dataset. Additionally, +including extra signatures may induce a false signal for the exposures +of the non-active signatures. The `musicatk` package has a "two-step" +prediction process. In the first step, exposures for all signatures will +be estimated. Then a subset of signatures will be selected as "active" +in the dataset and only the exposures for the active signatures will be +estimated. This two-step process can be done automatically using the +`auto_predict_grid` function: ```{r auto_pred_grid} # Predict exposures with auto selection of signatures @@ -331,83 +526,151 @@ result_cosmic_auto <- auto_predict_grid(musica_filter, table_name = "SBS96", sig rownames(exposures(result_cosmic_auto)) ``` -In this result, `r length(rownames(exposures(result_cosmic_auto)))` of the 30 original COSMIC V2 signatures were selected including several signatures that were not previously included in our first prediction with manually selected signatures. If multiple groups of samples are present in the dataset that are expected to have somewhat different sets of active signatures (e.g. multiple tumor types), then this 2-step process can be improved by performing signature selection within each group. This can be achieved by supplying the `sample_annoation` parameter. In our example, exposures were predicted in the three different tumor types by supplying the `Tumor_Type` annotation to `sample_annotation`. This parameter can be left `NULL` if no grouping annotation is available. - -The three major parameters that determine whether a signature is present in a dataset on the first pass are: - -* `min_exists` - A signature will be considered active in a sample if its exposure level is above this threshold (Default `0.05`). -* `proportion_samples` - A signature will be considered active in a cohort and included in the second pass if it is active in at least this proportion of samples (Default `0.25`). -* `rare_exposure` - A signature will be considered active in a cohort and included in the second pass if the proportion of its exposure is above this threshold in at least one sample (Default `0.4`). This parameter is meant to capture signatures that produce high number of mutations but are found in a small number of samples (e.g. Mismatch repair). +In this result, `r length(rownames(exposures(result_cosmic_auto)))` of +the 30 original COSMIC V2 signatures were selected including several +signatures that were not previously included in our first prediction +with manually selected signatures. If multiple groups of samples are +present in the dataset that are expected to have somewhat different sets +of active signatures (e.g. multiple tumor types), then this 2-step +process can be improved by performing signature selection within each +group. This can be achieved by supplying the `sample_annoation` +parameter. In our example, exposures were predicted in the three +different tumor types by supplying the `Tumor_Type` annotation to +`sample_annotation`. This parameter can be left `NULL` if no grouping +annotation is available. + +The three major parameters that determine whether a signature is present +in a dataset on the first pass are: + +- `min_exists` - A signature will be considered active in a sample if + its exposure level is above this threshold (Default `0.05`). +- `proportion_samples` - A signature will be considered active in a + cohort and included in the second pass if it is active in at least + this proportion of samples (Default `0.25`). +- `rare_exposure` - A signature will be considered active in a cohort + and included in the second pass if the proportion of its exposure is + above this threshold in at least one sample (Default `0.4`). This + parameter is meant to capture signatures that produce high number of + mutations but are found in a small number of samples (e.g. Mismatch + repair). ## Assess predicted signatures -It is almost always worthwhile to manually assess and confirm the signatures predicted to be present within a dataset, especially for signatures that have similar profiles to one another. For example, both COSMIC [Signature 4](https://cancer.sanger.ac.uk/signatures/media/images/v2_signature_profile_4.original.png) (smoking) and [Signature 24](https://cancer.sanger.ac.uk/signatures/media/images/v2_signature_profile_24.original.png) (aflatoxin) were predicted to be present within our dataset. The smoking-related signature is expected as our cohort contains lung cancers, but the aflatoxin signature is unexpected given that it is usually found in liver cancers. These signatures both have a strong concentration of C>A tranversions. In fact, we can see that the predicted exposures for these signatures are highly correlated to each other across samples: +It is almost always worthwhile to manually assess and confirm the +signatures predicted to be present within a dataset, especially for +signatures that have similar profiles to one another. For example, both +COSMIC [Signature +4](https://cancer.sanger.ac.uk/signatures/media/images/v2_signature_profile_4.original.png) +(smoking) and [Signature +24](https://cancer.sanger.ac.uk/signatures/media/images/v2_signature_profile_24.original.png) +(aflatoxin) were predicted to be present within our dataset. The +smoking-related signature is expected as our cohort contains lung +cancers, but the aflatoxin signature is unexpected given that it is +usually found in liver cancers. These signatures both have a strong +concentration of C\>A tranversions. In fact, we can see that the +predicted exposures for these signatures are highly correlated to each +other across samples: ```{r plot_SBS4_vs_SBS24} e <- exposures(result_cosmic_auto) -plot(e["Signature4",], e["Signature24",], xlab="SBS4", ylab="SBS24") +plot(e["SBS4",], e["SBS24",], xlab="SBS4", ylab="SBS24") ``` -Therefore, we will want to remove Signature 24 from our final prediction model. [Signature 18](https://cancer.sanger.ac.uk/signatures/media/images/v2_signature_profile_18.original.png) is another one with a high prevalence of C>A transversion at specific trinucleotide contexts. However, at least a few samples have high levels of Signature 18 without correspondingly high levels of Signature 4: +Therefore, we will want to remove Signature 24 from our final prediction +model. [Signature +18](https://cancer.sanger.ac.uk/signatures/media/images/v2_signature_profile_18.original.png) +is another one with a high prevalence of C\>A transversion at specific +trinucleotide contexts. However, at least a few samples have high levels +of Signature 18 without correspondingly high levels of Signature 4: ```{r plot_SBS4_vs_SBS18} -plot(e["Signature4",], e["Signature18",], xlab="SBS4", ylab="SBS18") -plot_exposures(result_cosmic_auto, num_samples = 25, sort_samples = "Signature18") +plot(e["SBS4",], e["SBS18",], xlab="SBS4", ylab="SBS18") +plot_exposures(result_cosmic_auto, num_samples = 25, sort_samples = "SBS18") ``` -Additionally, 2 of the 3 samples are skin cancers where the smoking signature is not usually expected: +Additionally, 2 of the 3 samples are skin cancers where the smoking +signature is not usually expected: ```{r high_sig18} -high.sbs18 <- tail(sort(e["Signature18",]), n = 3) +high.sbs18 <- tail(sort(e["SBS18",]), n = 3) annot[names(high.sbs18),] ``` -As a final check, we can look at the counts of the individual samples with high levels of Signature 18: +As a final check, we can look at the counts of the individual samples +with high levels of Signature 18: ```{r plot_sample_sig18} plot_sample_counts(musica_filter, sample_names = "TCGA-ER-A19P-06A-11D-A196-08") ``` -This sample clearly has high levels of both the UV signature confirming that it is likely a skin cancer. Signature 18 is also likely to be active as a high number of C>A mutations at CCA, TCA, and TCT trinucleotide contexts can be observed. Given these results, Signature 18 will be kept in the final analysis. +This sample clearly has high levels of both the UV signature confirming +that it is likely a skin cancer. Signature 18 is also likely to be +active as a high number of C\>A mutations at CCA, TCA, and TCT +trinucleotide contexts can be observed. Given these results, Signature +18 will be kept in the final analysis. -After additional analysis of other signatures, we also want to remove Signature 3 as that is predominantly found in tumors with BRCA deficiencies (e.g. breast cancer) and in samples with high rates of indels (which are not observed here). The `predict_exposure` function will be run one last time with the curated list of signatures and this final result will be used in the rest of the down-stream analyses: +After additional analysis of other signatures, we also want to remove +Signature 3 as that is predominantly found in tumors with BRCA +deficiencies (e.g. breast cancer) and in samples with high rates of +indels (which are not observed here). The `predict_exposure` function +will be run one last time with the curated list of signatures and this +final result will be used in the rest of the down-stream analyses: ```{r predict_cosmic_final} # Predict pre-existing exposures with the revised set of selected signatures result_cosmic_final <- predict_exposure(musica = musica_filter, table_name = "SBS96", signature_res = cosmic_v2_sigs, signatures_to_use = c(1, 2, 4, 6, 7, 13, 15, 18, 26), algorithm = "lda") ``` - # Downstream analyses ## Visualize relationships between samples with 2-D embedding -The `create_umap` function embeds samples in 2 dimensions using the `umap` function from the `r BiocStyle::CRANpkg("uwot")` package. The major parameters for fine tuning the UMAP are `n_neighbors`, `min_dist`, and `spread`. Generally, a higher `min_dist` will create more separation between the larger groups of samples while a lower See `?uwot::umap` for more information on these parameters as well as this [tutorial](https://pair-code.github.io/understanding-umap/) for fine-tuning. Here, a UMAP will be created with standard parameters: +The `create_umap` function embeds samples in 2 dimensions using the +`umap` function from the `r BiocStyle::CRANpkg("uwot")` package. The +major parameters for fine tuning the UMAP are `n_neighbors`, `min_dist`, +and `spread`. Generally, a higher `min_dist` will create more separation +between the larger groups of samples while a lower See `?uwot::umap` for +more information on these parameters as well as this +[tutorial](https://pair-code.github.io/understanding-umap/) for +fine-tuning. Here, a UMAP will be created with standard parameters: ```{r umap_create} set.seed(1) create_umap(result_cosmic_final) ``` -Note that while we are using the `result_cosmic_final` object which came from the prediction algorithm, we could have also used the `result_discov` object generated by the discovery algorithm. The `plot_umap` function will generate a scatter plot of the UMAP coordinates. The points of plot will be colored by the level of a signature by default: +Note that while we are using the `result_cosmic_final` object which came +from the prediction algorithm, we could have also used the +`result_discov` object generated by the discovery algorithm. The +`plot_umap` function will generate a scatter plot of the UMAP +coordinates. The points of plot will be colored by the level of a +signature by default: ```{r umap_plot} plot_umap(result_cosmic_final) ``` -By default, the exposures for each sample will share the same color scale. However, exposures for some signatures may have really high levels compared to others. To make a plot where exposures for each signature will have their own color scale, you can set `same_scale = FALSE`: +By default, the exposures for each sample will share the same color +scale. However, exposures for some signatures may have really high +levels compared to others. To make a plot where exposures for each +signature will have their own color scale, you can set +`same_scale = FALSE`: ```{r umap_plot_same_scale} plot_umap(result_cosmic_final, same_scale = FALSE) ``` -Lastly, points can be colored by a Sample Annotation by setting `color_by = "annotation"` and the `annotation` parameter to the name of the annotation: +Lastly, points can be colored by a Sample Annotation by setting +`color_by = "annotation"` and the `annotation` parameter to the name of +the annotation: ```{r umap_plot_annot} plot_umap(result_cosmic_final, color_by = "annotation", annotation = "Tumor_Type") ``` -If we set `add_annotation_labels = TRUE`, the centroid of each group is identified using medians and the labels are plotted at the position of the centroid: +If we set `add_annotation_labels = TRUE`, the centroid of each group is +identified using medians and the labels are plotted at the position of +the centroid: ```{r umap_plot_annot_label} plot_umap(result_cosmic_final, color_by = "annotation", annotation = "Tumor_Type", add_annotation_labels = TRUE) @@ -415,35 +678,53 @@ plot_umap(result_cosmic_final, color_by = "annotation", annotation = "Tumor_Type ## Plotting exposures in a heatmap -Exposures can be displayed in a heatmap where each row corresponds to a siganture and each column correponds to a sample: +Exposures can be displayed in a heatmap where each row corresponds to a +siganture and each column correponds to a sample: ```{r exposure_heatmap} plot_heatmap(result_cosmic_final) ``` -By default, signatures are scaled to have a mean of zero and a standard deviation of 1 across samples (i.e. z-scored). This can be turned off by setting `scale = FALSE`. Sample annotations can be displayed in the column color bar by setting the `annotation` parameter: +By default, signatures are scaled to have a mean of zero and a standard +deviation of 1 across samples (i.e. z-scored). This can be turned off by +setting `scale = FALSE`. Sample annotations can be displayed in the +column color bar by setting the `annotation` parameter: ```{r exposure_heatmap_annot} plot_heatmap(result_cosmic_final, annotation = "Tumor_Type") ``` -The heatmap shows that Signature 4 and Signature 7 are largely mutually exclusive from one another and can be used to separate lung and skin cancers. Additionally, subsets of signatures or samples can be displayed. For example, if we only want to examine signatures involved in mismatch repair, we can select signatures 6, 15, and 26: +The heatmap shows that Signature 4 and Signature 7 are largely mutually +exclusive from one another and can be used to separate lung and skin +cancers. Additionally, subsets of signatures or samples can be +displayed. For example, if we only want to examine signatures involved +in mismatch repair, we can select signatures 6, 15, and 26: ```{r exposure_heatmap_sigs} -plot_heatmap(result_cosmic_final, annotation = "Tumor_Type", subset_signatures = c("Signature6", "Signature15", "Signature26")) +plot_heatmap(result_cosmic_final, annotation = "Tumor_Type", subset_signatures = c("SBS6", "SBS15", "SBS26")) ``` -In this heatmap, we can see that only a small subset of distinct samples have relatively higher levels of these signatures. +In this heatmap, we can see that only a small subset of distinct samples +have relatively higher levels of these signatures. ## Clustering samples based on exposures -Samples can be grouped into **de novo** clusters using a several algorithms from the factoextra and cluster packages such as `pam` or `kmeans`. One major challenge is choosing the number of clusters (k). The function `k_select` has several metrics for examining cluster stability such as total within cluster sum of squares (`wss`), Silhouette Width (`silhouette`), and the Gap Statistic (`gap_stat`). +Samples can be grouped into **de novo** clusters using a several +algorithms from the factoextra and cluster packages such as `pam` or +`kmeans`. One major challenge is choosing the number of clusters (k). +The function `k_select` has several metrics for examining cluster +stability such as total within cluster sum of squares (`wss`), +Silhouette Width (`silhouette`), and the Gap Statistic (`gap_stat`). ```{r find_cluster_number} k_select(result_cosmic_final, method = "silhouette", clust.method = "pam", n = 20) ``` -While 2 clusters may be the most optimal choice, this would just correspond to the two large clusters of lung and skin tumors. Therefore, choosing a higher value may be more informative. The next major drop in the silhouette width is after `k = 6`, so we will select this moving forward and perform the clustering: +While 2 clusters may be the most optimal choice, this would just +correspond to the two large clusters of lung and skin tumors. Therefore, +choosing a higher value may be more informative. The next major drop in +the silhouette width is after `k = 6`, so we will select this moving +forward and perform the clustering: ```{r cluster} clusters <- cluster_exposure(result_cosmic_final, method = "pam", nclust = 6) @@ -460,7 +741,13 @@ plot_cluster(result_cosmic_final, cluster = clusters, group = "none") ## Plotly for interactive plots -The functions `plot_signatures`, `plot_exposures`, and `plot_umap` have the ability to create `r BiocStyle::CRANpkg("ggplotly")` plots by simply specifying `plotly = TRUE`. Plotly plots are interactive and allow users to zoom and re-sizing plots, turn on and off annotation types and legend values, and hover over elements of the plots (e.g. bars or points) to more information about that element (e.g. sample name). Here are examples of `plot_signatures` and `plot_exposures` +The functions `plot_signatures`, `plot_exposures`, and `plot_umap` have +the ability to create `r BiocStyle::CRANpkg("ggplotly")` plots by simply +specifying `plotly = TRUE`. Plotly plots are interactive and allow users +to zoom and re-sizing plots, turn on and off annotation types and legend +values, and hover over elements of the plots (e.g. bars or points) to +more information about that element (e.g. sample name). Here are +examples of `plot_signatures` and `plot_exposures` ```{r plotly} plot_signatures(result_cosmic_final, plotly = TRUE) @@ -469,17 +756,20 @@ plot_exposures(result_cosmic_final, num_samples = 25, plotly = TRUE) ## COSMIC signatures annotated to be active in a tumor type -The signatures predicted to be present in each tumor type according to the [COSMIC V2 database](https://cancer.sanger.ac.uk/cosmic/signatures_v2.tt) can be quickly retrieved. For example, we can find which signatures are predicted to be present in lung cancers: +The signatures predicted to be present in each tumor type according to +the [COSMIC V2 +database](https://cancer.sanger.ac.uk/cosmic/signatures_v2.tt) can be +quickly retrieved. For example, we can find which signatures are +predicted to be present in lung cancers: ```{r subtype_map} cosmic_v2_subtype_map("lung") ``` - ## Creating custom tables -Custom count tables can be created from user-defined mutation-level annotations -using the `build_custom_table` function. +Custom count tables can be created from user-defined mutation-level +annotations using the `build_custom_table` function. ```{r custom_table} # Adds strand information to the 'variant' table @@ -492,8 +782,6 @@ build_custom_table(musica = musica, variant_annotation = "Transcript_Strand", data_factor = c("T", "U"), overwrite = TRUE) ``` - - # Session Information ```{r session} diff --git a/vignettes/musicatk.Rmd b/vignettes/musicatk.Rmd index 89abd068..9f21dc41 100644 --- a/vignettes/musicatk.Rmd +++ b/vignettes/musicatk.Rmd @@ -10,8 +10,11 @@ output: BiocStyle::html_document vignette: > %\VignetteIndexEntry{Mutational Signature Comprehensive Analysis Toolkit} - %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} + %\VignetteEngine{knitr::rmarkdown} +editor_options: + markdown: + wrap: 72 --- ```{r setup, include=FALSE, results = "asis"} @@ -20,10 +23,26 @@ knitr::opts_chunk$set(echo = TRUE, dev = "png") ``` # Introduction -A variety of exogenous exposures or endogenous biological processes can contribute to the overall mutational load observed in human tumors. Many different mutational patterns, or “mutational signatures”, have been identified across different tumor types. These signatures can provide a record of environmental exposure and can give clues about the etiology of carcinogenesis. The Mutational Signature Comprehensive Analysis Toolkit (musicatk) has utilities for extracting variants from a variety of file formats, contains multiple methods for discovery of novel signatures or prediction of known signatures, as well as many types of downstream visualizations for exploratory analysis. This package has the ability to parse and combine multiple motif classes in the mutational signature discovery or prediction processes. Mutation motifs include single base substitutions (SBS), double base substitutions (DBS), insertions (INS) and deletions (DEL). + +A variety of exogenous exposures or endogenous biological processes can +contribute to the overall mutational load observed in human tumors. Many +different mutational patterns, or “mutational signatures”, have been +identified across different tumor types. These signatures can provide a +record of environmental exposure and can give clues about the etiology +of carcinogenesis. The Mutational Signature Comprehensive Analysis +Toolkit (musicatk) has utilities for extracting variants from a variety +of file formats, contains multiple methods for discovery of novel +signatures or prediction of known signatures, as well as many types of +downstream visualizations for exploratory analysis. This package has the +ability to parse and combine multiple motif classes in the mutational +signature discovery or prediction processes. Mutation motifs include +single base substitutions (SBS), double base substitutions (DBS), +insertions (INS) and deletions (DEL). # Installation -Currently musicatk can be installed from on Bioconductor using the following code: + +Currently musicatk can be installed from on Bioconductor using the +following code: ```{r, eval= FALSE} if (!requireNamespace("BiocManager", quietly=TRUE)){ @@ -47,21 +66,45 @@ The package can be loaded using the `library` command. library(musicatk) ``` -# Setting up a musica object -In order to discover or predict mutational signatures, we must first set up -our musica object by 1) extracting variants from files or objects such as -VCFs and MAFs, 2) selecting the appropriate reference genome 3) creating a -musica object, and 4) building a count tables for our variants of interest. +# Setting up a musica object from variants + +In order to discover or predict mutational signatures, we must first set +up our musica object by 1) extracting variants from files or objects +such as VCFs and MAFs, 2) selecting the appropriate reference genome 3) +creating a musica object, and 4) building a count tables for our +variants of interest. ## Extracting variants -Variants can be extracted from various formats using the following functions: -* The `extract_variants_from_vcf_file()` function will extract variants from a [VCF](https://samtools.github.io/hts-specs/) file. The file will be imported using the readVcf function from the [VariantAnnotation](https://bioconductor.org/packages/release/bioc/html/VariantAnnotation.html) package and then the variant information will be extracted from this object. -* The `extract_variants_from_vcf()` function extracts variants from a `CollapsedVCF` or `ExpandedVCF` object from the [VariantAnnotation](https://bioconductor.org/packages/release/bioc/html/VariantAnnotation.html) package. -* The `extract_variants_from_maf_file()` function will extract variants from a file in [Mutation Annotation Format (MAF)](https://docs.gdc.cancer.gov/Data/File_Formats/MAF_Format/) used by TCGA. -* The `extract_variants_from_maf()` function will extract variants from a MAF object created by the [maftools](https://www.bioconductor.org/packages/release/bioc/html/maftools.html) package. -* The `extract_variants_from_matrix()` function will get the information from a matrix or data.frame like object that has columns for the chromosome, start position, end position, reference allele, mutation allele, and sample name. -* The `extract_variants()` function will extract variants from a list of objects. These objects can be any combination of VCF files, VariantAnnotation objects, MAF files, MAF objects, and data.frame objects. +Variants can be extracted from various formats using the following +functions: + +- The `extract_variants_from_vcf_file()` function will extract + variants from a [VCF](https://samtools.github.io/hts-specs/) file. + The file will be imported using the readVcf function from the + [VariantAnnotation](https://bioconductor.org/packages/release/bioc/html/VariantAnnotation.html) + package and then the variant information will be extracted from this + object. +- The `extract_variants_from_vcf()` function extracts variants from a + `CollapsedVCF` or `ExpandedVCF` object from the + [VariantAnnotation](https://bioconductor.org/packages/release/bioc/html/VariantAnnotation.html) + package. +- The `extract_variants_from_maf_file()` function will extract + variants from a file in [Mutation Annotation Format + (MAF)](https://docs.gdc.cancer.gov/Data/File_Formats/MAF_Format/) + used by TCGA. +- The `extract_variants_from_maf()` function will extract variants + from a MAF object created by the + [maftools](https://www.bioconductor.org/packages/release/bioc/html/maftools.html) + package. +- The `extract_variants_from_matrix()` function will get the + information from a matrix or data.frame like object that has columns + for the chromosome, start position, end position, reference allele, + mutation allele, and sample name. +- The `extract_variants()` function will extract variants from a list + of objects. These objects can be any combination of VCF files, + VariantAnnotation objects, MAF files, MAF objects, and data.frame + objects. Below are some examples of extracting variants from MAF and VCF files: @@ -82,46 +125,69 @@ variants <- extract_variants(c(lusc_maf, luad_vcf, melanoma_vcfs)) ``` ## Choosing a genome -musicatk uses [BSgenome](https://bioconductor.org/packages/release/bioc/html/BSgenome.html) objects to access genome sequence information that flanks each mutation which is used bases for generating mutation count tables. BSgenome objects store full genome sequences for different organisms. A full list of supported organisms can be obtained by running `available.genomes()` after loading the BSgenome library. Custom genomes can be forged as well (see [BSgenome](https://bioconductor.org/packages/release/bioc/html/BSgenome.html) documentation). musicatk provides a utility function called `select_genome()` to allow users to quickly select human genome build versions "hg19" and "hg38" or mouse genome builds "mm9" and "mm10". The reference sequences for these genomes are in UCSC format (e.g. chr1). + +musicatk uses +[BSgenome](https://bioconductor.org/packages/release/bioc/html/BSgenome.html) +objects to access genome sequence information that flanks each mutation +which is used bases for generating mutation count tables. BSgenome +objects store full genome sequences for different organisms. A full list +of supported organisms can be obtained by running `available.genomes()` +after loading the BSgenome library. Custom genomes can be forged as well +(see +[BSgenome](https://bioconductor.org/packages/release/bioc/html/BSgenome.html) +documentation). musicatk provides a utility function called +`select_genome()` to allow users to quickly select human genome build +versions "hg19" and "hg38" or mouse genome builds "mm9" and "mm10". The +reference sequences for these genomes are in UCSC format (e.g. chr1). ```{r select_genome} g <- select_genome("hg38") ``` ## Creating a musica object -The last preprocessing step is to create an object with the variants and the genome using the `create_musica` function. This function will perform checks to ensure that the chromosome names and reference alleles in the input variant object match those in supplied BSgenome object. These checks can be turned off by setting `check_ref_chromosomes = FALSE` and `check_ref_bases = FALSE`, respectively. This function also looks for adjacent single base substitutions (SBSs) and will convert them to double base substitutions (DBSs). To disable this automatic conversion, set `convert_dbs = FALSE`. -```{r create_musica} -musica <- create_musica(x = variants, genome = g) -``` +The last preprocessing step is to create an object with the variants and +the genome using the `create_musica` function. This function will +perform checks to ensure that the chromosome names and reference alleles +in the input variant object match those in supplied BSgenome object. +These checks can be turned off by setting +`check_ref_chromosomes = FALSE` and `check_ref_bases = FALSE`, +respectively. This function also looks for adjacent single base +substitutions (SBSs) and will convert them to double base substitutions +(DBSs). To disable this automatic conversion, set `convert_dbs = FALSE`. +```{r create_musica_from_variants} +musica <- create_musica_from_variants(x = variants, genome = g) +``` # Creating mutation count tables -Motifs are the building blocks of mutational signatures. Motifs themselves are -a mutation combined with other genomic information. For instance, **SBS96** -motifs are constructed from an SBS mutation and one upstream and one downstream -base sandwiched together. We build tables by counting these motifs for each -sample. + +Motifs are the building blocks of mutational signatures. Motifs +themselves are a mutation combined with other genomic information. For +instance, **SBS96** motifs are constructed from an SBS mutation and one +upstream and one downstream base sandwiched together. We build tables by +counting these motifs for each sample. + ```{r build_tables} build_standard_table(musica, g = g, table_name = "SBS96") ``` -Here is a list of mutation tables that can be created by setting the +Here is a list of mutation tables that can be created by setting the `table_name` parameter in the `build_standard_table` function: -* SBS96 - Motifs are the six possible single base pair mutation types times the -four possibilities each for upstream and downstream context bases (4*6*4 = 96 -motifs) -* SBS192_Trans - Motifs are an extension of SBS96 multiplied by the -transcriptional strand (translated/untranslated), can be specified with -`"Transcript_Strand"`. -* SBS192_Rep - Motifs are an extension of SBS96 multiplied by the -replication strand (leading/lagging), can be specified with -`"Replication_Strand"`. -* DBS - Motifs are the 78 possible double-base-pair substitutions -* INDEL - Motifs are 83 categories intended to capture different categories of -indels based on base-pair change, repeats, or microhomology, insertion or -deletion, and length. +- SBS96 - Motifs are the six possible single base pair mutation types + times the four possibilities each for upstream and downstream + context bases (4*6*4 = 96 motifs) +- SBS192_Trans - Motifs are an extension of SBS96 multiplied by the + transcriptional strand (translated/untranslated), can be specified + with `"Transcript_Strand"`. +- SBS192_Rep - Motifs are an extension of SBS96 multiplied by the + replication strand (leading/lagging), can be specified with + `"Replication_Strand"`. +- DBS - Motifs are the 78 possible double-base-pair substitutions +- INDEL - Motifs are 83 categories intended to capture different + categories of indels based on base-pair change, repeats, or + microhomology, insertion or deletion, and length. ```{r combine_tables} data(dbs_musica) @@ -140,7 +206,6 @@ combine_count_tables(musica = dbs_musica, to_comb = c("SBS96", "DBS78"), table, combining SBS96 and DBS", overwrite = TRUE) ``` - ```{r} annotate_transcript_strand(musica, "19", build_table = FALSE) build_custom_table(musica = musica, variant_annotation = "Transcript_Strand", @@ -149,23 +214,40 @@ build_custom_table(musica = musica, variant_annotation = "Transcript_Strand", data_factor = c("T", "U"), overwrite = TRUE) ``` +Different count tables can be combined into one using the +`combine_count_tables` function. For example, the SBS96 and the DBS +tables could be combined and mutational signature discovery could be +performed across both mutations modalities. Tables with information +about the same types of variants (e.g. two related SBS tables) should +generally not be combined and used together. + +Custom count tables can be created from user-defined mutation-level +annotations using the `build_custom_table` function. + +# Create a musica object from an existing count table + +A musica object can be created directly from a count table if it is +available from the start. A variant object or file is not required. The +count table should contain mutational motifs as rows and samples as +columns, and must be able to be coerced to a matrix. The variant class +must also be provided, either SBS, DBS, or IND. Here, the example count +table 'synthetic_breast_counts' is used. + +```{r reate_musica_from_counts} -Different count tables can be combined into one using the `combine_count_tables` -function. For example, the SBS96 and the DBS tables could be combined and -mutational signature discovery could be performed across both mutations -modalities. Tables with information about the same types of variants (e.g. -two related SBS tables) should generally not be combined and used together. +musica2 <- create_musica_from_counts(x = synthetic_breast_counts, variant_class = "SBS") -Custom count tables can be created from user-defined mutation-level annotations -using the `build_custom_table` function. +``` # Discovering Signatures and Exposures + Mutational signature discovery is the process of deconvoluting a matrix -containing counts for each mutation type in each sample into two matrices: 1) -a **signature** matrix containing the probability of each mutation motif in -signature and 2) an **exposure** matrix containing the estimated counts of each -signature in each sample. Discovery and prediction results are save in a -`musica_result` object that includes both the signatures and sample exposures. +containing counts for each mutation type in each sample into two +matrices: 1) a **signature** matrix containing the probability of each +mutation motif in signature and 2) an **exposure** matrix containing the +estimated counts of each signature in each sample. Discovery and +prediction results are save in a `musica_result` object that includes +both the signatures and sample exposures. ```{r discover_sigs} result <- discover_signatures(musica = musica, table_name = "SBS96", @@ -173,17 +255,17 @@ result <- discover_signatures(musica = musica, table_name = "SBS96", nstart = 10) ``` - Supported signature discovery algorithms include: -* Non-negative matrix factorization (nmf) -* Latent Dirichlet Allocation (lda) +- Non-negative matrix factorization (nmf) +- Latent Dirichlet Allocation (lda) -Both have built-in `seed` capabilities for reproducible results, `nstarts` for -multiple independent chains from which the best final result will be chosen. -NMF also allows for parallel processing via `par_cores`. +Both have built-in `seed` capabilities for reproducible results, +`nstarts` for multiple independent chains from which the best final +result will be chosen. NMF also allows for parallel processing via +`par_cores`. -To get the signatures or exposures from the result object, the following +To get the signatures or exposures from the result object, the following functions can be used: ```{r result_accessors} @@ -196,42 +278,63 @@ sigs <- signatures(result) sigs[1:3,1:3] ``` +# Import a musica result object + +If signature discovery or prediction was performed previously or +externally, a 'musica_result' object can be created directly from the +signatures and exposures. The signatures table should contain mutational +motifs as rows and signatures as columns and be able to be coerced to a +matrix. The exposures matrix should contain signature weights as rows +and samples as columns and must also be able to be coerced to a matrix. +A mutation count table must also be provided. The method or algorithm +used to generate the results may be provided with 'algorithm'. Here, +example data from a synthetic breast cancer dataset is used. + +```{r create_musica_result} + +NMF_result <- create_musica_result(signatures = example_predicted_sigs, exposures = example_predicted_exp, count_table = synthetic_breast_counts, algorithm = "NMF") + +``` # Plotting ## Signatures -Barplots showing the probability of each mutation type in each signature can -be plotted with the `plot_signatures` function: + +Barplots showing the probability of each mutation type in each signature +can be plotted with the `plot_signatures` function: ```{r, plot_sigs} plot_signatures(result) ``` -By default, the scales on the y-axis are forced to be the same across all -signatures. This behavior can be turned off by setting `same_scale = FALSE`. -Signatures can be re-named based on prior knowledge and displayed in the plots: +By default, the scales on the y-axis are forced to be the same across +all signatures. This behavior can be turned off by setting +`same_scale = FALSE`. Signatures can be re-named based on prior +knowledge and displayed in the plots: ```{r, name_sigs} name_signatures(result, c("Smoking", "APOBEC", "UV")) plot_signatures(result) ``` - ## Exposures -Barplots showing the exposures in each sample can be plotted with the +Barplots showing the exposures in each sample can be plotted with the `plot_exposures` function: ```{r exposures_raw} plot_exposures(result, plot_type = "bar") ``` -The proportion of each exposure in each tumor can be shown by setting `proportional = TRUE`: +The proportion of each exposure in each tumor can be shown by setting +`proportional = TRUE`: + ```{r exposures_prop} plot_exposures(result, plot_type = "bar", proportional = TRUE) ``` -Counts for individual samples can also be plotted with the `plot_sample_counts` function: +Counts for individual samples can also be plotted with the +`plot_sample_counts` function: ```{r sample_counts} samples <- sample_names(musica) @@ -239,18 +342,39 @@ plot_sample_counts(musica, sample_names = samples[c(3,4,5)], table_name = "SBS96 ``` ## Comparison to external signatures (e.g. COSMIC) -A common analysis is to compare the signatures estimated in a dataset to those generated in other datasets or to those in the [COSMIC database](https://cancer.sanger.ac.uk/cosmic/signatures). We have a set of functions that can be used to easily perform pairwise correlations between signatures. The `compare_results` functions compares the signatures between two `musica_result` objects. The `compare_cosmic_v2` will correlate the signatures between a `musica_result` object and the SBS signatures in COSMIC V2. For example: + +A common analysis is to compare the signatures estimated in a dataset to +those generated in other datasets or to those in the [COSMIC +database](https://cancer.sanger.ac.uk/cosmic/signatures). We have a set +of functions that can be used to easily perform pairwise correlations +between signatures. The `compare_results` functions compares the +signatures between two `musica_result` objects. The `compare_cosmic_v2` +will correlate the signatures between a `musica_result` object and the +SBS signatures in COSMIC V2. For example: ```{r compare_cosmic} compare_cosmic_v2(result, threshold = 0.75) ``` -In this example, our Signatures 1 and 2 were most highly correlated to COSMIC Signature 7 and 4, respectively, so this may indicate that samples in our dataset were exposed to UV radiation or cigarette smoke. Only pairs of signatures who have a correlation above the `threshold` parameter will be returned. If no pairs of signatures are found, then you may want to consider lowering the threshold. The default correlation metric is the cosine similarity, but this can be changed to using 1 - Jensen-Shannon Divergence by setting -`metric = "jsd"` Signatures can also be correlated to those in the COSMIC V3 database using the `compare_cosmic_v3` function. +In this example, our Signatures 1 and 2 were most highly correlated to +COSMIC Signature 7 and 4, respectively, so this may indicate that +samples in our dataset were exposed to UV radiation or cigarette smoke. +Only pairs of signatures who have a correlation above the `threshold` +parameter will be returned. If no pairs of signatures are found, then +you may want to consider lowering the threshold. The default correlation +metric is the cosine similarity, but this can be changed to using 1 - +Jensen-Shannon Divergence by setting `metric = "jsd"` Signatures can +also be correlated to those in the COSMIC V3 database using the +`compare_cosmic_v3` function. # Predicting exposures using pre-existing signatures -Instead of discovering mutational signatures and exposures from a dataset *de novo*, one might get better results by predicting the exposures of signatures that have been previously estimated in other datasets. We incorporate several methods for estimating exposures given a set of pre-existing signatures. For example, we can predict exposures for COSMIC signatures 1, 4, 7, and 13 in our current dataset: +Instead of discovering mutational signatures and exposures from a +dataset *de novo*, one might get better results by predicting the +exposures of signatures that have been previously estimated in other +datasets. We incorporate several methods for estimating exposures given +a set of pre-existing signatures. For example, we can predict exposures +for COSMIC signatures 1, 4, 7, and 13 in our current dataset: ```{r predict_cosmic} @@ -267,22 +391,39 @@ pred_cosmic <- predict_exposure(musica = musica, table_name = "SBS96", plot_exposures(pred_cosmic, plot_type = "bar") ``` -The `cosmic_v2_sigs` object is just a `musica_result` object containing COSMIC V2 signatures without any sample or exposure information. Note that if `signatures_to_use` is not supplied by the user, then exposures for all signatures in the result object will be estimated. We can predict exposures for samples in any `musica` object from any `musica_result` object as long as the same mutation schema was utilized. +The `cosmic_v2_sigs` object is just a `musica_result` object containing +COSMIC V2 signatures without any sample or exposure information. Note +that if `signatures_to_use` is not supplied by the user, then exposures +for all signatures in the result object will be estimated. We can +predict exposures for samples in any `musica` object from any +`musica_result` object as long as the same mutation schema was utilized. -We can list which signatures are present in each tumor type according to the [COSMIC V2 database](https://cancer.sanger.ac.uk/cosmic/signatures_v2.tt). For example, we can find which signatures are present in lung cancers: +We can list which signatures are present in each tumor type according to +the [COSMIC V2 +database](https://cancer.sanger.ac.uk/cosmic/signatures_v2.tt). For +example, we can find which signatures are present in lung cancers: ```{r subtype_map} cosmic_v2_subtype_map("lung") ``` -We can predict exposures for samples in a `musica` object using the signatures from any `musica_result` object. Just for illustration, we will predict exposures using the estimated signatures from `musica_result` object we created earlier: +We can predict exposures for samples in a `musica` object using the +signatures from any `musica_result` object. Just for illustration, we +will predict exposures using the estimated signatures from +`musica_result` object we created earlier: ```{r predict_previous} pred_our_sigs <- predict_exposure(musica = musica, table_name = "SBS96", signature_res = result, algorithm = "lda") ``` -Of course, this example is not very useful because we are predicting exposures using signatures that were learned on the same set of samples. Most of the time, you want to give the `signature_res` parameter a `musica_result` object that wss generated using independent samples from those in the `musica` object. As mentioned above, different signatures in different result objects can be compared to each other using the `compare_results` function: +Of course, this example is not very useful because we are predicting +exposures using signatures that were learned on the same set of samples. +Most of the time, you want to give the `signature_res` parameter a +`musica_result` object that wss generated using independent samples from +those in the `musica` object. As mentioned above, different signatures +in different result objects can be compared to each other using the +`compare_results` function: ```{r predict_compare} compare_results(result = pred_cosmic, other_result = pred_our_sigs, @@ -293,16 +434,24 @@ compare_results(result = pred_cosmic, other_result = pred_our_sigs, ## Adding sample annotations -We first must add an annotation to the `musica` or `musica_result` object +We first must add an annotation to the `musica` or `musica_result` +object + ```{r annotations} annot <- read.table(system.file("extdata", "sample_annotations.txt", package = "musicatk"), sep = "\t", header=TRUE) samp_annot(result, "Tumor_Subtypes") <- annot$Tumor_Subtypes ``` -Note that the annotations can also be added directly the `musica` object in the beginning using the same function: `samp_annot(musica, "Tumor_Subtypes") <- annot$Tumor_Subtypes`. These annotations will then automatically be included in any down-stream result object. +Note that the annotations can also be added directly the `musica` object +in the beginning using the same function: +`samp_annot(musica, "Tumor_Subtypes") <- annot$Tumor_Subtypes`. These +annotations will then automatically be included in any down-stream +result object. -* **Be sure that the annotation vector being supplied is in the same order as the samples in the `musica` or `musica_result` object.** You can view the sample order with the `sample_names` function: +- **Be sure that the annotation vector being supplied is in the same + order as the samples in the `musica` or `musica_result` object.** + You can view the sample order with the `sample_names` function: ```{r sample_names} sample_names(result) @@ -310,20 +459,30 @@ sample_names(result) ## Plotting exposures by a Sample Annotation -As mentioned previously, the `plot_exposures` function can plot sample exposures in a `musica_result` object. It can group exposures by either a sample annotation or by a signature by setting the `group_by` parameter. Here will will group the exposures by the `Tumor_Subtype` annotation: +As mentioned previously, the `plot_exposures` function can plot sample +exposures in a `musica_result` object. It can group exposures by either +a sample annotation or by a signature by setting the `group_by` +parameter. Here will will group the exposures by the `Tumor_Subtype` +annotation: ```{r plot_exposures_by_subtype} plot_exposures(result, plot_type = "bar", group_by = "annotation", annotation = "Tumor_Subtypes") ``` -The distribution of exposures with respect to annotation can be viewed using boxplots by setting `plot_type = "box"` and `group_by = "annotation"`: +The distribution of exposures with respect to annotation can be viewed +using boxplots by setting `plot_type = "box"` and +`group_by = "annotation"`: ```{r plot_exposures_box_annot} plot_exposures(result, plot_type = "box", group_by = "annotation", annotation = "Tumor_Subtypes") ``` -Note that the name of the annotation must be supplied via the `annotation` parameter. Boxplots can be converted to violin plots by setting `plot_type = "violin"`. To compare the level of each exposure across sample groups within a signature, we can set `group_by = "signature"` and `color_by = "annotation"`: +Note that the name of the annotation must be supplied via the +`annotation` parameter. Boxplots can be converted to violin plots by +setting `plot_type = "violin"`. To compare the level of each exposure +across sample groups within a signature, we can set +`group_by = "signature"` and `color_by = "annotation"`: ```{r plot_exposures_box_sig} plot_exposures(result, plot_type = "box", group_by = "signature", @@ -332,40 +491,55 @@ plot_exposures(result, plot_type = "box", group_by = "signature", ## Visualizing samples in 2D using UMAP -The `create_umap` function embeds samples in 2 dimensions using the `umap` function from the [uwot](https://cran.r-project.org/web/packages/uwot/index.html) package. The major parameters for fine tuning the UMAP are `n_neighbors`, `min_dist`, and `spread`. See `?uwot::umap` for more information on these parameters. +The `create_umap` function embeds samples in 2 dimensions using the +`umap` function from the +[uwot](https://cran.r-project.org/web/packages/uwot/index.html) package. +The major parameters for fine tuning the UMAP are `n_neighbors`, +`min_dist`, and `spread`. See `?uwot::umap` for more information on +these parameters. ```{r umap_create} create_umap(result = result) ``` -The `plot_umap` function will generate a scatter plot of the UMAP coordinates. The points of plot will be colored by the level of a signature by default: +The `plot_umap` function will generate a scatter plot of the UMAP +coordinates. The points of plot will be colored by the level of a +signature by default: ```{r umap_plot} plot_umap(result = result) ``` -By default, the exposures for each sample will share the same color scale. However, exposures for some signatures may have really high levels compared to others. To make a plot where exposures for each signature will have their own color scale, you can set `same_scale = FALSE`: +By default, the exposures for each sample will share the same color +scale. However, exposures for some signatures may have really high +levels compared to others. To make a plot where exposures for each +signature will have their own color scale, you can set +`same_scale = FALSE`: ```{r umap_plot_same_scale} plot_umap(result = result, same_scale = FALSE) ``` -Lastly, points can be colored by a Sample Annotation by setting `color_by = "annotation"` and `annotation` to the name of the annotation: +Lastly, points can be colored by a Sample Annotation by setting +`color_by = "annotation"` and `annotation` to the name of the +annotation: ```{r umap_plot_annot} plot_umap(result = result, color_by = "annotation", annotation = "Tumor_Subtypes", add_annotation_labels = TRUE) ``` -When `add_annotation_labels = TRUE`, the centroid of each group is identified -using medians and the labels are plotted on top of the centroid. +When `add_annotation_labels = TRUE`, the centroid of each group is +identified using medians and the labels are plotted on top of the +centroid. # Use of Plotly in plotting -plot_signatures, plot_exposures, and plot_umap, all have builty in ggplotly -capabilities. Simply specifying `plotly = TRUE` enables interactive plots -that allows examination of individuals sections, zooming and resizing, and -turning on and off annotation types and legend values. +plot_signatures, plot_exposures, and plot_umap, all have builty in +ggplotly capabilities. Simply specifying `plotly = TRUE` enables +interactive plots that allows examination of individuals sections, +zooming and resizing, and turning on and off annotation types and legend +values. ```{r plotly} plot_signatures(result, plotly = TRUE) @@ -375,16 +549,17 @@ plot_umap(result, plotly = TRUE) # Note on reproducibility -Several functions make use of stochastic algorithms or procedures which -require the use of random number generator (RNG) for simulation or sampling. -To maintain reproducibility, all these functions should be called using -```set_seed(1)``` or ```withr::with_seed(seed, function())``` to make sure -same results are generatedeach time one of these functions is called. Using -with_seed for reproducibility has the advantage of not interfering with any -other user seeds, but using one or the other is important for several functions -including *discover_signatures*, *predict_exposure*, and *create_umap*, as -these functions use stochastic processes that may produce different results -when run multiple times with the same settings. +Several functions make use of stochastic algorithms or procedures which +require the use of random number generator (RNG) for simulation or +sampling. To maintain reproducibility, all these functions should be +called using `set_seed(1)` or `withr::with_seed(seed, function())` to +make sure same results are generatedeach time one of these functions is +called. Using with_seed for reproducibility has the advantage of not +interfering with any other user seeds, but using one or the other is +important for several functions including *discover_signatures*, +*predict_exposure*, and *create_umap*, as these functions use stochastic +processes that may produce different results when run multiple times +with the same settings. ```{r reproducible_prediction} seed <- 1 @@ -394,8 +569,8 @@ reproducible_prediction <- withr::with_seed(seed, signature_res = result, algorithm = "lda")) ``` - # Session Information + ```{r session} sessionInfo() ``` From 42747fef6b02e97cca14e59914d96e596c9a09e0 Mon Sep 17 00:00:00 2001 From: Natasha Gurevich Date: Tue, 25 Jun 2024 13:21:17 -0400 Subject: [PATCH 15/17] Benchmarking bug fixes and new predict_and_benchmark function --- NAMESPACE | 1 + R/benchmarking.R | 128 ++++++++++++++++++++++++++++++++--- man/create_benchmark.Rd | 4 +- man/predict_and_benchmark.Rd | 67 ++++++++++++++++++ 4 files changed, 188 insertions(+), 12 deletions(-) create mode 100644 man/predict_and_benchmark.Rd diff --git a/NAMESPACE b/NAMESPACE index d6071d0a..d7574069 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -88,6 +88,7 @@ export(plot_sample_counts) export(plot_sample_reconstruction_error) export(plot_signatures) export(plot_umap) +export(predict_and_benchmark) export(predict_exposure) export(rc) export(samp_annot) diff --git a/R/benchmarking.R b/R/benchmarking.R index 57bb903f..68223067 100644 --- a/R/benchmarking.R +++ b/R/benchmarking.R @@ -4,13 +4,14 @@ #' #' @param true_signatures A matrix of true signatures by mutational motifs #' @param true_exposures A matrix of samples by true signature weights +#' @param count_table Summary table with per-sample unnormalized motif counts #' #' @return A \code{\linkS4class{full_benchmark}} object #' @export -create_benchmark <- function(true_signatures, true_exposures){ +create_benchmark <- function(true_signatures, true_exposures, count_table){ # create musica result object to hold true exposures and signatures - truth <- create_musica_result(true_signatures, true_exposures) + truth <- create_musica_result(true_signatures, true_exposures, count_table) # create benchmark object full_benchmark <- new("full_benchmark", ground_truth = truth) @@ -189,8 +190,8 @@ benchmark <- function(full_benchmark, prediction, method_id = NULL, #benchmark_plot_exposures(full_benchmark, method_id, prediction = "Initial") #message("\nDuplicate signature exposures before/afters...\n") - benchmark_plot_duplicate_exposures(full_benchmark, method_id) - #print(duplicate_plot) + duplicate_plot <- benchmark_plot_duplicate_exposures(full_benchmark, method_id) + print(duplicate_plot) #message("\nIntermediate Signatures...\n") intermediate_sig_plot <- benchmark_plot_signatures(full_benchmark, method_id, prediction = "Intermediate", same_scale = FALSE) @@ -201,8 +202,8 @@ benchmark <- function(full_benchmark, prediction, method_id = NULL, #benchmark_plot_exposures(full_benchmark, method_id, prediction = "Intermediate") #message("\nComposite signature exposures before/afters...\n") - benchmark_plot_composite_exposures(full_benchmark, method_id) - #print(composite_plot) + composite_plot <- benchmark_plot_composite_exposures(full_benchmark, method_id) + print(composite_plot) #message("\nFinal Signatures...\n") final_sig_plot <- benchmark_plot_signatures(full_benchmark, method_id, prediction = "Final", same_scale = FALSE) @@ -225,6 +226,73 @@ benchmark <- function(full_benchmark, prediction, method_id = NULL, +} + +#' Predict and benchmark +#' +#' This function will discover signatures from a musica object using a given +#' algorithm and a range of k values. A new discovery is done for each k value. +#' As each discovery is completed, the prediction is benchmarked. +#' +#' @param musica A \code{\linkS4class{musica}} object. +#' @param table_name Name of the table to use for signature discovery. Needs +#' to be the same name supplied to the table building functions such as +#' \link{build_standard_table}. +#' @param algorithm Method to use for mutational signature discovery. One of +#' \code{"lda"} or \code{"nmf"}. Default \code{"lda"}. +#' @param k_min Minimum number of singatures to predict +#' @param k_max Maximum number of signatures to predict +#' @param full_benchmark An object of class \code{\linkS4class{full_benchmark}} +#' created with the \link{create_benchmark} function or returned from a previous +#' \code{benchmark} run. +#' @param threshold Cosine similarity cutoff for comparing preidcted and true +#' signatures. Default \code{0.8}. +#' @param adjustment_threshold Cosine similarity value of high confidence. +#' Comparisons that meet this cutoff are assumed to be likely, +#' while those that fall below the cutoff will be disregarded if the predicted +#' signature is already captured above the threshold. Default \code{0.9}. +#' @param plot If \code{FALSE}, plots will be suppressed. Default \code{TRUE}. +#' @param seed Seed to be used for the random number generators in the +#' signature discovery algorithms. Default \code{1}. +#' @param nstart Number of independent random starts used in the mutational +#' signature algorithms. Default \code{10}. +#' @param par_cores Number of parallel cores to use. Only used if +#' \code{method = "nmf"}. Default \code{1}. +#' +#' @return If \code{make_copy == TRUE}, a new \code{full_benchmark} object is +#' returned. If \code{make_copy == FALSE}, nothing is returned. +#' @export +predict_and_benchmark <- function(musica, table_name, algorithm = "lda", k_min, + k_max, full_benchmark, threshold = 0.8, + adjustment_threshold = 0.9, plot = FALSE, + seed = 1, nstart = 10, par_cores = 1){ + + + for (k in c(k_min:k_max)){ + + message("Discovering signatures for k = ", k, "...") + + prediction <- discover_signatures(musica = musica, table_name = table_name, + num_signatures = k, algorithm = algorithm, + seed = seed, nstart = nstart, + par_cores = par_cores) + + message("Benchmarking prediction for k = ", k, "...\n") + + full_benchmark <- benchmark(full_benchmark = full_benchmark, + prediction = prediction, + method_id = paste(algorithm, k, sep = ""), + threshold = threshold, + adjustment_threshold = adjustment_threshold, + description = paste("Prediction from predict_and_benchmark function. Algorithm: ", algorithm, ". K = ", k, ".", sep = ""), + plot = plot, make_copy = TRUE) + } + + + print(full_benchmark@method_view_summary) + + return(full_benchmark) + } @@ -560,12 +628,14 @@ benchmark_plot_exposures <- function(full_benchmark, method_id, prediction){ predicted <- c() true <- c() sig <- c() + index <- 1 for (true_sig in comparison$y_sig_name){ predicted_sig <- - comparison[comparison$y_sig_name == true_sig,4] + comparison[index,4] predicted <- c(predicted, exposures(prediction)[predicted_sig,]) true <- c(true, exposures(truth)[,true_sig]) sig <- c(sig, rep(true_sig, dim(exposures(truth))[1])) + index <- index + 1 } plot_df <- data.frame(Predicted = predicted, True = true, Sig = sig) @@ -600,7 +670,7 @@ benchmark_plot_exposures <- function(full_benchmark, method_id, prediction){ #' @param method_id The identifier for the \code{\linkS4class{single_benchmark}} #' object of interest #' -#' @return Two ggplot images are displayed +#' @return A ggplot object #' @export benchmark_plot_duplicate_exposures <- function(full_benchmark, method_id){ @@ -631,6 +701,8 @@ benchmark_plot_duplicate_exposures <- function(full_benchmark, method_id){ freq <- table(comparison$y_sig_name) duplicated_signatures <- names(freq[freq > 1]) + count <- 1 + for (duplicated_sig in duplicated_signatures){ before_exposures <- NULL @@ -666,7 +738,7 @@ benchmark_plot_duplicate_exposures <- function(full_benchmark, method_id){ geom_smooth(method = "lm") + theme(legend.title=element_blank()) - print(before_plot) + #print(before_plot) new_sig_name <- "Merged Signature (" for (sig in sigs_to_merge){ @@ -700,8 +772,26 @@ benchmark_plot_duplicate_exposures <- function(full_benchmark, method_id){ geom_smooth(method = "lm") + theme(legend.title=element_blank()) - print(after_plot) + #print(after_plot) + figure <- ggpubr::ggarrange(before_plot, after_plot, ncol = 2, nrow = 1) + + if (count == 1){ + full_figure <- figure + } + else{ + full_figure <- ggpubr::ggarrange(full_figure, figure, ncol = 1, heights = c(count - 1,1)) + } + + count <- count + 1 + #print(figure) + + + } + + #print(full_figure) + if (count != 1){ + return(full_figure) } } @@ -719,7 +809,7 @@ benchmark_plot_duplicate_exposures <- function(full_benchmark, method_id){ #' @param method_id The identifier for the \code{\linkS4class{single_benchmark}} #' object of interest #' -#' @return Two ggplot images are displayed +#' @return A ggplot object #' @export benchmark_plot_composite_exposures <- function(full_benchmark, method_id){ @@ -750,6 +840,8 @@ benchmark_plot_composite_exposures <- function(full_benchmark, method_id){ freq <- table(comparison$x_sig_name) composite_signatures <- names(freq[freq > 1]) + count <- 1 + for (composite_sig in composite_signatures){ before_exposures <- NULL @@ -830,10 +922,24 @@ benchmark_plot_composite_exposures <- function(full_benchmark, method_id){ print(after_plot) + figure <- ggpubr::ggarrange(before_plot, after_plot, ncol = 2, nrow = 1) + + if (count == 1){ + full_figure <- figure + } + else{ + full_figure <- ggpubr::ggarrange(full_figure, figure, ncol = 1, heights = c(count - 1,1)) + } + + count <- count + 1 } #combined_plot<- gridExtra::grid.arrange(before_plot, after_plot, ncol = 2) #return(combined_plot) + + if (count != 1){ + return(full_figure) + } } diff --git a/man/create_benchmark.Rd b/man/create_benchmark.Rd index 2db5a52b..98fb11bf 100644 --- a/man/create_benchmark.Rd +++ b/man/create_benchmark.Rd @@ -4,12 +4,14 @@ \alias{create_benchmark} \title{Create a full_benchmark object} \usage{ -create_benchmark(true_signatures, true_exposures) +create_benchmark(true_signatures, true_exposures, count_table) } \arguments{ \item{true_signatures}{A matrix of true signatures by mutational motifs} \item{true_exposures}{A matrix of samples by true signature weights} + +\item{count_table}{Summary table with per-sample unnormalized motif counts} } \value{ A \code{\linkS4class{full_benchmark}} object diff --git a/man/predict_and_benchmark.Rd b/man/predict_and_benchmark.Rd new file mode 100644 index 00000000..2b4d89f8 --- /dev/null +++ b/man/predict_and_benchmark.Rd @@ -0,0 +1,67 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/benchmarking.R +\name{predict_and_benchmark} +\alias{predict_and_benchmark} +\title{Predict and benchmark} +\usage{ +predict_and_benchmark( + musica, + table_name, + algorithm = "lda", + k_min, + k_max, + full_benchmark, + threshold = 0.8, + adjustment_threshold = 0.9, + plot = FALSE, + seed = 1, + nstart = 10, + par_cores = 1 +) +} +\arguments{ +\item{musica}{A \code{\linkS4class{musica}} object.} + +\item{table_name}{Name of the table to use for signature discovery. Needs +to be the same name supplied to the table building functions such as +\link{build_standard_table}.} + +\item{algorithm}{Method to use for mutational signature discovery. One of +\code{"lda"} or \code{"nmf"}. Default \code{"lda"}.} + +\item{k_min}{Minimum number of singatures to predict} + +\item{k_max}{Maximum number of signatures to predict} + +\item{full_benchmark}{An object of class \code{\linkS4class{full_benchmark}} +created with the \link{create_benchmark} function or returned from a previous +\code{benchmark} run.} + +\item{threshold}{Cosine similarity cutoff for comparing preidcted and true +signatures. Default \code{0.8}.} + +\item{adjustment_threshold}{Cosine similarity value of high confidence. +Comparisons that meet this cutoff are assumed to be likely, +while those that fall below the cutoff will be disregarded if the predicted +signature is already captured above the threshold. Default \code{0.9}.} + +\item{plot}{If \code{FALSE}, plots will be suppressed. Default \code{TRUE}.} + +\item{seed}{Seed to be used for the random number generators in the +signature discovery algorithms. Default \code{1}.} + +\item{nstart}{Number of independent random starts used in the mutational +signature algorithms. Default \code{10}.} + +\item{par_cores}{Number of parallel cores to use. Only used if +\code{method = "nmf"}. Default \code{1}.} +} +\value{ +If \code{make_copy == TRUE}, a new \code{full_benchmark} object is +returned. If \code{make_copy == FALSE}, nothing is returned. +} +\description{ +This function will discover signatures from a musica object using a given +algorithm and a range of k values. A new discovery is done for each k value. +As each discovery is completed, the prediction is benchmarked. +} From f4c6277e96fdc063e0ac00b4fbcc7efdf94b6e2f Mon Sep 17 00:00:00 2001 From: Natasha Gurevich Date: Fri, 28 Jun 2024 14:48:49 -0400 Subject: [PATCH 16/17] Updates to return structure of benchmark_plot_composite_exposures and benchmark_plot_duplicate_exposures --- R/benchmarking.R | 74 ++++++++------------ man/benchmark_plot_composite_exposures.Rd | 2 +- man/benchmark_plot_duplicate_exposures.Rd | 2 +- vignettes/articles/benchmarking_tutorial.Rmd | 2 +- 4 files changed, 33 insertions(+), 47 deletions(-) diff --git a/R/benchmarking.R b/R/benchmarking.R index 68223067..cd1b91ac 100644 --- a/R/benchmarking.R +++ b/R/benchmarking.R @@ -670,7 +670,7 @@ benchmark_plot_exposures <- function(full_benchmark, method_id, prediction){ #' @param method_id The identifier for the \code{\linkS4class{single_benchmark}} #' object of interest #' -#' @return A ggplot object +#' @return A list of ggplot objects #' @export benchmark_plot_duplicate_exposures <- function(full_benchmark, method_id){ @@ -701,7 +701,7 @@ benchmark_plot_duplicate_exposures <- function(full_benchmark, method_id){ freq <- table(comparison$y_sig_name) duplicated_signatures <- names(freq[freq > 1]) - count <- 1 + final_figures <- list() for (duplicated_sig in duplicated_signatures){ @@ -773,26 +773,14 @@ benchmark_plot_duplicate_exposures <- function(full_benchmark, method_id){ theme(legend.title=element_blank()) #print(after_plot) - figure <- ggpubr::ggarrange(before_plot, after_plot, ncol = 2, nrow = 1) - if (count == 1){ - full_figure <- figure - } - else{ - full_figure <- ggpubr::ggarrange(full_figure, figure, ncol = 1, heights = c(count - 1,1)) - } - - count <- count + 1 - - #print(figure) + figure <- ggpubr::ggarrange(before_plot, after_plot, ncol = 2, nrow = 1) + final_figures <- append(final_figures, list(figure)) } - #print(full_figure) - if (count != 1){ - return(full_figure) - } + return(final_figures) } @@ -809,7 +797,7 @@ benchmark_plot_duplicate_exposures <- function(full_benchmark, method_id){ #' @param method_id The identifier for the \code{\linkS4class{single_benchmark}} #' object of interest #' -#' @return A ggplot object +#' @return A list of ggplot objects #' @export benchmark_plot_composite_exposures <- function(full_benchmark, method_id){ @@ -842,6 +830,8 @@ benchmark_plot_composite_exposures <- function(full_benchmark, method_id){ count <- 1 + final_figures <- list() + for (composite_sig in composite_signatures){ before_exposures <- NULL @@ -879,7 +869,7 @@ benchmark_plot_composite_exposures <- function(full_benchmark, method_id){ geom_smooth(method = "lm") + theme(legend.title=element_blank()) - print(before_plot) + #print(before_plot) colnames <- NULL for (component in sig_components){ @@ -920,27 +910,15 @@ benchmark_plot_composite_exposures <- function(full_benchmark, method_id){ geom_smooth(method = "lm") + theme(legend.title=element_blank()) - print(after_plot) + #print(after_plot) figure <- ggpubr::ggarrange(before_plot, after_plot, ncol = 2, nrow = 1) - if (count == 1){ - full_figure <- figure - } - else{ - full_figure <- ggpubr::ggarrange(full_figure, figure, ncol = 1, heights = c(count - 1,1)) - } - - count <- count + 1 + final_figures <- append(final_figures, list(figure)) + } - #combined_plot<- gridExtra::grid.arrange(before_plot, after_plot, ncol = 2) - #return(combined_plot) - - if (count != 1){ - return(full_figure) - } - + return(final_figures) } @@ -1311,23 +1289,31 @@ benchmark_plot_composite_exposures <- function(full_benchmark, method_id){ high_threshold_comp <- comparison[comparison$cosine > adjustment_threshold,] indexes_to_keep <- c() - for (index in 1:dim(low_threshold_comp)[1]){ - if (low_threshold_comp[index,4] %in% high_threshold_comp$x_sig_name == FALSE){ - indexes_to_keep <- c(indexes_to_keep, index) - } - else{ - existing_cs <- high_threshold_comp[high_threshold_comp$x_sig_name == low_threshold_comp[index,4], 1][1] - diff <- abs(existing_cs - low_threshold_comp[index,1]) - if (diff < 0.05){ + if (dim(low_threshold_comp)[1] > 0){ + for (index in 1:dim(low_threshold_comp)[1]){ + if (low_threshold_comp[index,4] %in% high_threshold_comp$x_sig_name == FALSE){ indexes_to_keep <- c(indexes_to_keep, index) } + else{ + existing_cs <- high_threshold_comp[high_threshold_comp$x_sig_name == low_threshold_comp[index,4], 1][1] + diff <- abs(existing_cs - low_threshold_comp[index,1]) + if (diff < 0.05){ + indexes_to_keep <- c(indexes_to_keep, index) + } + } } - } + comparison_adj <- rbind(high_threshold_comp, low_threshold_comp[indexes_to_keep,]) return(comparison_adj) + } + + else{ + return(comparison) + } + } # Function for claculating RE diff --git a/man/benchmark_plot_composite_exposures.Rd b/man/benchmark_plot_composite_exposures.Rd index 343bff07..33bce1d6 100644 --- a/man/benchmark_plot_composite_exposures.Rd +++ b/man/benchmark_plot_composite_exposures.Rd @@ -14,7 +14,7 @@ benchmarking analysis} object of interest} } \value{ -Two ggplot images are displayed +A list of ggplot objects } \description{ After a prediction has been benchmarked with the \link{benchmark} function, diff --git a/man/benchmark_plot_duplicate_exposures.Rd b/man/benchmark_plot_duplicate_exposures.Rd index 36ee78b2..f7cbc7a0 100644 --- a/man/benchmark_plot_duplicate_exposures.Rd +++ b/man/benchmark_plot_duplicate_exposures.Rd @@ -14,7 +14,7 @@ benchmarking analysis} object of interest} } \value{ -Two ggplot images are displayed +A list of ggplot objects } \description{ After a prediction has been benchmarked with the \link{benchmark} function, diff --git a/vignettes/articles/benchmarking_tutorial.Rmd b/vignettes/articles/benchmarking_tutorial.Rmd index d8aada44..0e3c9d74 100644 --- a/vignettes/articles/benchmarking_tutorial.Rmd +++ b/vignettes/articles/benchmarking_tutorial.Rmd @@ -79,7 +79,7 @@ count_table <- synthetic_breast_counts musica <- create_musica_from_counts(count_table, "SBS96") # prediction -res2 <- discover_signatures(musica, "SBS96", num_signatures = 9, algorithm = "nmf") +res2 <- discover_signatures(musica, "SBS96", num_signatures = 8, algorithm = "nmf") ``` From a887c9682c794bf289afc9281474eb6017472d7d Mon Sep 17 00:00:00 2001 From: Natasha Gurevich Date: Mon, 16 Sep 2024 17:46:20 -0400 Subject: [PATCH 17/17] Updates to benchmarking tutorial --- vignettes/articles/benchmarking_tutorial.Rmd | 48 ++++++++++++-------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/vignettes/articles/benchmarking_tutorial.Rmd b/vignettes/articles/benchmarking_tutorial.Rmd index 0e3c9d74..37587adb 100644 --- a/vignettes/articles/benchmarking_tutorial.Rmd +++ b/vignettes/articles/benchmarking_tutorial.Rmd @@ -1,26 +1,34 @@ --- title: "Benchmarking signaure predictions against a ground truth" -date: "Compiled `r format(Sys.time(), '%B %d, %Y')`" author: "Natasha Gurevich, Joshua Campbell" +date: "Compiled `r format(Sys.time(), '%B %d, %Y')`" +output: word_document --- ```{r setup, include = FALSE} knitr::opts_chunk$set(warning = FALSE, fig.align='center') ``` # Introduction -write something here..... -```{r setup} +There are many different tools available to perform mutational signatures analysis. In order to compare their performances, or to benchmark a new tool against existing methods, there needs to be a standard way to evaluate how accurate the results are. To evaluate result accuracy, we must have the ground truth signatures and exposures. Then, the predicted signatures and exposures can be compared to the truth, and the differences can be quantified. + +Comparing prediction to ground truth necessitates mapping predicted signatures to true signatures. In order to achieve a one-to-one matching of predicted and true signatures, several artifacts need to be accounted for. This includes duplicate signatures, where multiple predicted signatures have similarity to the same true signature, and composite signatures, where one predicted signature has similarity to multiple true signatures. Missing signatures (true signatures that were not recovered in the prediction) and spurious signatures (predicted signatures that do not correlate to any true signature) must also be noted. + +In the benchmarking procedure below, these artifacts are addressed and plots and summary tables are generated to qualitatively and quantitatively assess how accurate the predicted results are compared to the ground truth. The process can be run multiple times for different predictions, and the summary tables will aggregate the results from the multiple runs, allowing for easy comparison between different tools or methods. + +```{r load package} + library(musicatk) + ``` # Step 1. Initializing the Benchmarking Structure -Results from all analyses related to the same ground truth signatures and loadings can be stored in one `full_benchmark` object. Benchmarking can be performed on multiple prediction instances and stored in one place, allowing for easy comparison between discovery methods or discovery parameters. +Results from all analyses related to the same ground truth signatures and exposures can be stored in one `full_benchmark` object. Benchmarking can be performed on multiple prediction instances and stored in one place, allowing for easy comparison between discovery methods or discovery parameters. -The `full_benchmark` object is initialized using the ground truth signatures and loadings, both of which must be a matrix. The signatures matrix should contain signatures as columns mutation types as rows. Each cell details that mutation type's percent contribution to the given signature. The loadings matrix should contain signatures as columns and samples as rows. Each cell details the number of mutations in the sample that are attributed to the given signature. +The `full_benchmark` object is initialized using the ground truth signatures and exposures, both of which must be a matrix. The signatures matrix should contain signatures as columns and mutation types as rows. Each cell details that mutation type's percent contribution to the given signature. The exposures matrix should contain signatures as columns and samples as rows. Each cell details the number of mutations in the sample that are attributed to the given signature. -The example below prepares a `full_benchmark` object using a snythetic breast cancer dataset provided in the package. +The example below prepares a `full_benchmark` object using a synthetic breast cancer dataset provided in the package. ```{r prepare ground truth} @@ -29,9 +37,11 @@ true_sigs <- c("SBS1", "SBS2", "SBS3", "SBS8", "SBS13", "SBS17", "SBS18", "SBS26 true_signatures <- signatures(cosmic_v2_sigs)[,true_sigs] # load true exposures +data("synthetic_breast_true_exposures") true_exposures <- synthetic_breast_true_exposures # load count table +data("synthetic_breast_counts") count_table <- synthetic_breast_counts ``` @@ -45,28 +55,28 @@ full_benchmark <- create_benchmark(true_signatures, true_exposures, count_table) # Step 2. Prepare prediction to benchmark -In order to benchmark the results of signature discovery, the prediction results must be stored in a `musica_result` object. +In order to benchmark the results of signature discovery, the prediction results must be stored in a `musica_result` object. ## If signature discovery has already been completed externally -If signature discovery has already been performed and therefore predicted signatures and predicted loadings already exist, this information must be stored in a `musica_result` object using the `create_musica_result` function. Example data is used here. +If signature discovery has already been performed and therefore predicted signatures and predicted exposures already exist, this information must be stored in a `musica_result` object using the `create_musica_result` function. Example data is used here. ```{r create musica result object} # read in predicted signatures predicted_sigs <- example_predicted_sigs -# read in predicted loadings -predicted_loadings <- example_predicted_exp +# read in predicted exposures +predicted_exposures <- example_predicted_exp # store in a musica result object -res1 <- create_musica_result(predicted_sigs, predicted_loadings, count_table) +res1 <- create_musica_result(predicted_sigs, predicted_exposures, count_table) ``` ## If signature discovery has not yet been performed -If signature discovery has not yet been performed, it cane be done via the NMF or LDA algorithms within the musicatk package. Mutation data can be read in and stored in a `musica` object either from a file of variants or from a +Alternatively, if signature discovery has not yet been performed, it cane be done via the NMF or LDA algorithms within the musicatk package. Mutation data can be read in and stored in a `musica` object either from a file of variants or from a count table using the standard procedure. In this example, mutation data is stored in a count table, which is used to create a `musica` object. Signature @@ -97,9 +107,9 @@ benchmark(full_benchmark, res2) If a new `full_benchmark` object is desired, the `make_copy` argument can be used to return a new object, leaving the inputted one unchanged. Below, the res1 prediction will be benchmarked and the result is saved to a new object. For better readability, all plots are suppressed. -```{r perform benchmarking} +```{r perform benchmarking with copy} -new_benchmark <- benchmark(full_benchmark, res1, plot = FALSE) +new_benchmark <- benchmark(full_benchmark, res1, make_copy = TRUE, plot = FALSE) ``` @@ -111,7 +121,7 @@ Any element of the `full_benchmark` or `single_benchmark` objects can be easily Summary tables can easily be extracted from a `full_benchmark` object to visualize the results of the full analysis. -```{r view summary} +```{r view method view summary} method_view_summary(full_benchmark) @@ -134,15 +144,15 @@ res2_benchmark <- benchmark_get_entry(full_benchmark, "res2") ``` -Once a single benchmark object has been extracted, the three different predictions within it can be extracted. These include (1) the prediction (predicted signatures and exposures) before any benchmarking adjustments have been made, (2) the prediction after duplicate signatures have been adjusted, and (3) the final prediction, after both duplicate and composite signatures have been adjusted. +Once a single benchmark object has been extracted, the three different predictions within it can be extracted. These include (1) the initial prediction, before any benchmarking adjustments have been made, (2) the intermediate prediction, after duplicate signatures have been adjusted, and (3) the final prediction, after both duplicate and composite signatures have been adjusted. ```{r extract a prediction} -# extract the res2 prediction before becnhamrking adjustments have been made +# extract the res2 prediction before benchamrking adjustments have been made res2_initial_prediction <- benchmark_get_prediction(res2_benchmark, "initial") # extract the res2 prediction after duplicate signatures have been corrected, but before composites have been corrected -res2_initial_prediction <- benchmark_get_prediction(res2_benchmark, "intermediate") +res2_intermediate_prediction <- benchmark_get_prediction(res2_benchmark, "intermediate") # extract the res2 prediction at the end of its benchmarking process res2_final_prediction <- benchmark_get_prediction(res2_benchmark, "final") @@ -185,7 +195,7 @@ benchmark_plot_exposures(full_benchmark, "res2", "Initial") ``` -To recreate the before/after plots comparing predicted and true exposures before/after duplicate or composite signatures are corrected, the `benchmark_plot_duplicate_exposures` and `benchmark_plot_composite_exposures` functions can be used, respectively. +To recreate the plots comparing predicted and true exposures before/after duplicate or composite signatures are corrected, the `benchmark_plot_duplicate_exposures` and `benchmark_plot_composite_exposures` functions can be used, respectively. There are no duplicate or composite signatures for this example, so no plots are produced here. ```{r plot duplicate/composite exposures before/after adjustment}