From 4bfed08690ae76f62130d604502b0be73c3220f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Hammarstr=C3=B6m?= Date: Mon, 9 Mar 2020 11:42:25 +0100 Subject: [PATCH] Added value_type argument to choose what value to display --- .Rbuildignore | 2 + .gitignore | 4 ++ R/geom_venn.R | 114 +++++++++++++++++++++++------------------------ R/ggvenn.R | 58 ++++++++++++++---------- ggvenn.Rproj | 17 +++++++ man/geom_venn.Rd | 8 ++-- man/ggvenn.Rd | 11 +++-- 7 files changed, 126 insertions(+), 88 deletions(-) create mode 100644 .Rbuildignore create mode 100644 .gitignore create mode 100644 ggvenn.Rproj diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..91114bf --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,2 @@ +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5b6a065 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata diff --git a/R/geom_venn.R b/R/geom_venn.R index ee29fa7..7735519 100644 --- a/R/geom_venn.R +++ b/R/geom_venn.R @@ -21,10 +21,10 @@ #' #' # use data.frame as input #' d <- tibble(value = c(1, 2, 3, 5, 6, 7, 8, 9, 10, 12, 13), -#' `Set 1` = c(T, F, T, T, F, T, F, T, F, F, F), -#' `Set 2` = c(T, F, F, T, F, F, F, T, F, F, T), -#' `Set 3` = c(T, T, F, F, F, F, T, T, F, F, F), -#' `Set 4` = c(F, F, F, F, T, T, F, F, T, T, F)) +#' `Set 1` = c(TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE), +#' `Set 2` = c(TRUE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE), +#' `Set 3` = c(TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE), +#' `Set 4` = c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, FALSE)) #' #' # ggplot gramma #' ggplot(d) + @@ -68,8 +68,8 @@ geom_venn <- function(mapping = NULL, data = NULL, text_color = "black", text_size = 4) { l <- layer(mapping = mapping, data = data, - geom = GeomVenn, stat = stat, position = position, - params = list(na.rm = TRUE, ...)) + geom = GeomVenn, stat = stat, position = position, + params = list(na.rm = TRUE, ...)) old_compute_aesthetics <- l$compute_aesthetics l$compute_aesthetics <- function(self, data, plot) { if (is.null(set_names)) { @@ -99,55 +99,55 @@ geom_venn <- function(mapping = NULL, data = NULL, } GeomVenn <- ggproto("GeomVenn", Geom, - required_aes = c("A", "B"), - optional_aes = c("C", "D", "label"), - extra_params = c("na.rm"), - setup_data = function(self, data, params) { - data %>% mutate(xmin = -2, xmax = 2, ymin = -2, ymax = 2) - }, - draw_panel = function(self, data, panel_params, coord, ...) { - attr <- self$customize_attributes - sets <- c("A", "B", "C", "D") - sets <- sets[sets %in% names(data)] - show_elements <- NA - if ("label" %in% names(data)) { - show_elements <- "label" - } - venn <- prepare_venn_data(data, sets, show_elements) - d0 <- coord_munch(coord, venn$shapes, panel_params) - d <- d0 %>% - filter(!duplicated(group)) %>% - mutate(fill_color = attr$fill_color[group], - fill_alpha = attr$fill_alpha, - stroke_color = attr$stroke_color, - stroke_alpha = attr$stroke_alpha, - stroke_size = attr$stroke_size, - stroke_linetype = attr$stroke_linetype) - d1 <- coord_munch(coord, venn$labels, panel_params) - d2 <- coord_munch(coord, venn$texts, panel_params) - ggplot2:::ggname("geom_venn", - grobTree( - polygonGrob( - d0$x, d0$y, default.units = "native", id = d0$group, - gp = gpar(col = NA, - fill = alpha(d$fill_color, d$fill_alpha))), - polygonGrob( - d0$x, d0$y, default.units = "native", id = d0$group, - gp = gpar(col = alpha(d$stroke_color, d$stroke_alpha), - fill = NA, - lwd = d$stroke_size * .pt, - lty = d$stroke_linetype)), - textGrob( - self$set_names, d1$x, d1$y, default.units = "native", - hjust = d1$hjust, vjust = d1$vjust, - gp = gpar(col = attr$set_name_color, - fontsize = attr$set_name_size * .pt)), - textGrob( - d2$text, d2$x, d2$y, default.units = "native", - hjust = d2$hjust, vjust = d2$vjust, - gp = gpar(col = attr$text_color, - fontsize = attr$text_size * .pt)) - ) - ) - } + required_aes = c("A", "B"), + optional_aes = c("C", "D", "label"), + extra_params = c("na.rm"), + setup_data = function(self, data, params) { + data %>% mutate(xmin = -2, xmax = 2, ymin = -2, ymax = 2) + }, + draw_panel = function(self, data, panel_params, coord, ...) { + attr <- self$customize_attributes + sets <- c("A", "B", "C", "D") + sets <- sets[sets %in% names(data)] + show_elements <- NA + if ("label" %in% names(data)) { + show_elements <- "label" + } + venn <- prepare_venn_data(data, sets, show_elements) + d0 <- coord_munch(coord, venn$shapes, panel_params) + d <- d0 %>% + filter(!duplicated(group)) %>% + mutate(fill_color = attr$fill_color[group], + fill_alpha = attr$fill_alpha, + stroke_color = attr$stroke_color, + stroke_alpha = attr$stroke_alpha, + stroke_size = attr$stroke_size, + stroke_linetype = attr$stroke_linetype) + d1 <- coord_munch(coord, venn$labels, panel_params) + d2 <- coord_munch(coord, venn$texts, panel_params) + ggplot2:::ggname("geom_venn", + grobTree( + polygonGrob( + d0$x, d0$y, default.units = "native", id = d0$group, + gp = gpar(col = NA, + fill = alpha(d$fill_color, d$fill_alpha))), + polygonGrob( + d0$x, d0$y, default.units = "native", id = d0$group, + gp = gpar(col = alpha(d$stroke_color, d$stroke_alpha), + fill = NA, + lwd = d$stroke_size * .pt, + lty = d$stroke_linetype)), + textGrob( + self$set_names, d1$x, d1$y, default.units = "native", + hjust = d1$hjust, vjust = d1$vjust, + gp = gpar(col = attr$set_name_color, + fontsize = attr$set_name_size * .pt)), + textGrob( + d2$text, d2$x, d2$y, default.units = "native", + hjust = d2$hjust, vjust = d2$vjust, + gp = gpar(col = attr$text_color, + fontsize = attr$text_size * .pt)) + ) + ) + } ) diff --git a/R/ggvenn.R b/R/ggvenn.R index f8e16cc..a05d909 100644 --- a/R/ggvenn.R +++ b/R/ggvenn.R @@ -4,6 +4,7 @@ #' @param data A data.frame or a list as input data. #' @param columns A character vector use as index to select columns/elements. #' @param show_elements Show set elements instead of count/percentage. +#' @param value_type Display "count" data only or "both" counts and percentages #' @param fill_color Filling colors in circles. #' @param fill_alpha Transparency for filling circles. #' @param stroke_color Stroke color for drawing circles. @@ -29,10 +30,10 @@ #' #' # use data.frame as input #' d <- tibble(value = c(1, 2, 3, 5, 6, 7, 8, 9, 10, 12, 13), -#' `Set 1` = c(T, F, T, T, F, T, F, T, F, F, F), -#' `Set 2` = c(T, F, F, T, F, F, F, T, F, F, T), -#' `Set 3` = c(T, T, F, F, F, F, T, T, F, F, F), -#' `Set 4` = c(F, F, F, F, T, T, F, F, T, T, F)) +#' `Set 1` = c(TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE), +#' `Set 2` = c(TRUE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE), +#' `Set 3` = c(TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE), +#' `Set 4` = c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, FALSE)) #' ggvenn(d, c("Set 1", "Set 2")) #' ggvenn(d, c("Set 1", "Set 2", "Set 3")) #' ggvenn(d) @@ -47,6 +48,7 @@ #' @export ggvenn <- function(data, columns = NULL, show_elements = FALSE, + value_type = "both", fill_color = c("blue", "yellow", "green", "red"), fill_alpha = .5, stroke_color = "black", @@ -57,7 +59,7 @@ ggvenn <- function(data, columns = NULL, set_name_size = 6, text_color = "black", text_size = 4) { - venn <- prepare_venn_data(data, columns, show_elements) + venn <- prepare_venn_data(data, columns, show_elements, value_type) venn$shapes %>% mutate(group = LETTERS[group]) %>% ggplot() + @@ -88,10 +90,10 @@ gen_circle <- function(group, x_offset = 0, y_offset = 0, radius = 1, radius_b = radius, theta_offset = 0, length.out = 100) { tibble(group = group, theta = seq(0, 2 * pi, length.out = length.out)) %>% - mutate(x_raw = radius * cos(theta), - y_raw = radius_b * sin(theta), - x = x_offset + x_raw * cos(theta_offset) - y_raw * sin(theta_offset), - y = y_offset + x_raw * sin(theta_offset) + y_raw * cos(theta_offset)) + mutate(x_raw = radius * cos(theta), + y_raw = radius_b * sin(theta), + x = x_offset + x_raw * cos(theta_offset) - y_raw * sin(theta_offset), + y = y_offset + x_raw * sin(theta_offset) + y_raw * cos(theta_offset)) } gen_circle_2 <- function() { @@ -164,7 +166,7 @@ gen_label_pos_4 <- function() { "D", 1.5, -1.3, 0, 1) } -prepare_venn_data <- function(data, columns = NULL, show_elements = FALSE) { +prepare_venn_data <- function(data, columns = NULL, show_elements = FALSE, value_type = "both") { if (is.data.frame(data)) { if (is.null(columns)) { columns = data %>% select_if(is.logical) %>% names @@ -184,7 +186,7 @@ prepare_venn_data <- function(data, columns = NULL, show_elements = FALSE) { stopifnot((d1 %>% count(A, B) %>% with(n)) == 1) for (i in 1:nrow(d1)) { idx <- ((!xor(d1$A[[i]], as_tibble(data)[,columns[[1]]])) & - (!xor(d1$B[[i]], as_tibble(data)[,columns[[2]]]))) + (!xor(d1$B[[i]], as_tibble(data)[,columns[[2]]]))) d1$n[[i]] <- sum(idx) if (!identical(show_elements, FALSE)) { d1$text[[i]] <- paste(unlist(as_tibble(data)[idx,show_elements]), collapse = ",") @@ -200,8 +202,8 @@ prepare_venn_data <- function(data, columns = NULL, show_elements = FALSE) { stopifnot((d1 %>% count(A, B, C) %>% with(n)) == 1) for (i in 1:nrow(d1)) { idx <- ((!xor(d1$A[[i]], as_tibble(data)[,columns[[1]]])) & - (!xor(d1$B[[i]], as_tibble(data)[,columns[[2]]])) & - (!xor(d1$C[[i]], as_tibble(data)[,columns[[3]]]))) + (!xor(d1$B[[i]], as_tibble(data)[,columns[[2]]])) & + (!xor(d1$C[[i]], as_tibble(data)[,columns[[3]]]))) d1$n[[i]] <- sum(idx) if (!identical(show_elements, FALSE)) { d1$text[[i]] <- paste(unlist(as_tibble(data)[idx,show_elements]), collapse = ",") @@ -218,9 +220,9 @@ prepare_venn_data <- function(data, columns = NULL, show_elements = FALSE) { stopifnot((d1 %>% count(A, B, C, D) %>% with(n)) == 1) for (i in 1:nrow(d1)) { idx <- ((d1$A[[i]] == as_tibble(data)[,columns[[1]], drop = TRUE]) & - (d1$B[[i]] == as_tibble(data)[,columns[[2]], drop = TRUE]) & - (d1$C[[i]] == as_tibble(data)[,columns[[3]], drop = TRUE]) & - (d1$D[[i]] == as_tibble(data)[,columns[[4]], drop = TRUE])) + (d1$B[[i]] == as_tibble(data)[,columns[[2]], drop = TRUE]) & + (d1$C[[i]] == as_tibble(data)[,columns[[3]], drop = TRUE]) & + (d1$D[[i]] == as_tibble(data)[,columns[[4]], drop = TRUE])) d1$n[[i]] <- sum(idx) if (!identical(show_elements, FALSE)) { d1$text[[i]] <- paste(unlist(as_tibble(data)[idx,show_elements]), collapse = ",") @@ -243,7 +245,7 @@ prepare_venn_data <- function(data, columns = NULL, show_elements = FALSE) { stopifnot((d1 %>% count(A, B) %>% with(n)) == 1) for (i in 1:nrow(d1)) { idx <- ((!xor(d1$A[[i]], a2 %in% data[[columns[[1]]]])) & - (!xor(d1$B[[i]], a2 %in% data[[columns[[2]]]]))) + (!xor(d1$B[[i]], a2 %in% data[[columns[[2]]]]))) d1$n[[i]] <- sum(idx) d1$text[[i]] <- paste(a2[idx], collapse = ",") } @@ -254,8 +256,8 @@ prepare_venn_data <- function(data, columns = NULL, show_elements = FALSE) { stopifnot((d1 %>% count(A, B, C) %>% with(n)) == 1) for (i in 1:nrow(d1)) { idx <- ((!xor(d1$A[[i]], a2 %in% data[[columns[[1]]]])) & - (!xor(d1$B[[i]], a2 %in% data[[columns[[2]]]])) & - (!xor(d1$C[[i]], a2 %in% data[[columns[[3]]]]))) + (!xor(d1$B[[i]], a2 %in% data[[columns[[2]]]])) & + (!xor(d1$C[[i]], a2 %in% data[[columns[[3]]]]))) d1$n[[i]] <- sum(idx) d1$text[[i]] <- paste(a2[idx], collapse = ",") } @@ -266,9 +268,9 @@ prepare_venn_data <- function(data, columns = NULL, show_elements = FALSE) { stopifnot((d1 %>% count(A, B, C, D) %>% with(n)) == 1) for (i in 1:nrow(d1)) { idx <- ((!xor(d1$A[[i]], a2 %in% data[[columns[[1]]]])) & - (!xor(d1$B[[i]], a2 %in% data[[columns[[2]]]])) & - (!xor(d1$C[[i]], a2 %in% data[[columns[[3]]]])) & - (!xor(d1$D[[i]], a2 %in% data[[columns[[4]]]]))) + (!xor(d1$B[[i]], a2 %in% data[[columns[[2]]]])) & + (!xor(d1$C[[i]], a2 %in% data[[columns[[3]]]])) & + (!xor(d1$D[[i]], a2 %in% data[[columns[[4]]]]))) d1$n[[i]] <- sum(idx) d1$text[[i]] <- paste(a2[idx], collapse = ",") } @@ -281,7 +283,17 @@ prepare_venn_data <- function(data, columns = NULL, show_elements = FALSE) { stop("`data` should be a list") } if (!show_elements) { - d1 <- d1 %>% mutate(text = sprintf("%d\n(%.1f%%)", n, 100 * n / sum(n))) + + if(value_type == "both"){ + d1 <- d1 %>% mutate(text = sprintf("%d\n(%.1f%%)", n, 100 * n / sum(n))) + } + if(value_type == "count"){ + d1 <- d1 %>% mutate(text = sprintf("%d", n, 100 * n / sum(n))) + } + + } + + list(shapes = d, texts = d1, labels = d2) } diff --git a/ggvenn.Rproj b/ggvenn.Rproj new file mode 100644 index 0000000..21a4da0 --- /dev/null +++ b/ggvenn.Rproj @@ -0,0 +1,17 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/man/geom_venn.Rd b/man/geom_venn.Rd index 8d54939..7f49dea 100644 --- a/man/geom_venn.Rd +++ b/man/geom_venn.Rd @@ -74,10 +74,10 @@ library(ggvenn) # use data.frame as input d <- tibble(value = c(1, 2, 3, 5, 6, 7, 8, 9, 10, 12, 13), - `Set 1` = c(T, F, T, T, F, T, F, T, F, F, F), - `Set 2` = c(T, F, F, T, F, F, F, T, F, F, T), - `Set 3` = c(T, T, F, F, F, F, T, T, F, F, F), - `Set 4` = c(F, F, F, F, T, T, F, F, T, T, F)) + `Set 1` = c(TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE), + `Set 2` = c(TRUE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE), + `Set 3` = c(TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE), + `Set 4` = c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, FALSE)) # ggplot gramma ggplot(d) + diff --git a/man/ggvenn.Rd b/man/ggvenn.Rd index 62d563b..d63977c 100644 --- a/man/ggvenn.Rd +++ b/man/ggvenn.Rd @@ -8,6 +8,7 @@ ggvenn( data, columns = NULL, show_elements = FALSE, + value_type = "both", fill_color = c("blue", "yellow", "green", "red"), fill_alpha = 0.5, stroke_color = "black", @@ -27,6 +28,8 @@ ggvenn( \item{show_elements}{Show set elements instead of count/percentage.} +\item{value_type}{Display "count" data only or "both" counts and percentages} + \item{fill_color}{Filling colors in circles.} \item{fill_alpha}{Transparency for filling circles.} @@ -67,10 +70,10 @@ ggvenn(a) # use data.frame as input d <- tibble(value = c(1, 2, 3, 5, 6, 7, 8, 9, 10, 12, 13), - `Set 1` = c(T, F, T, T, F, T, F, T, F, F, F), - `Set 2` = c(T, F, F, T, F, F, F, T, F, F, T), - `Set 3` = c(T, T, F, F, F, F, T, T, F, F, F), - `Set 4` = c(F, F, F, F, T, T, F, F, T, T, F)) + `Set 1` = c(TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE), + `Set 2` = c(TRUE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE), + `Set 3` = c(TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE), + `Set 4` = c(FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, FALSE)) ggvenn(d, c("Set 1", "Set 2")) ggvenn(d, c("Set 1", "Set 2", "Set 3")) ggvenn(d)