Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement automatic time-ordering of coordinates #115

Merged
merged 4 commits into from
Aug 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# ggdag (development version)
* Implemented automatic time-ordered coordinates for `time_ordered_coords()` and `layout` (#115)

# ggdag 0.2.10
* Fixed bug where ggdag was using wrong `left_join()` interface with dplyr 1.1.0 (#110)
Expand Down
16 changes: 15 additions & 1 deletion R/dagify.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,24 @@ dagify <- function(..., exposure = NULL, outcome = NULL, latent = NULL, labels =
dagitty::coordinates(dgty) <- coords2list(coords)
} else if (is.list(coords)) {
dagitty::coordinates(dgty) <- coords
} else if (is.function(coords)) {
dagitty::coordinates(dgty) <- coords2list(coords(edges2df(dgty)))
} else {
stop("`coords` must be of class `list` or `data.frame`")
stop("`coords` must be of class `list`, `data.frame`, or `function`")
}
}
if (!is.null(labels)) label(dgty) <- labels
dgty
}

edges2df <- function(.dag) {
.edges <- dagitty::edges(.dag)
no_outgoing_edges <- unique(.edges$w[!(.edges$w %in% .edges$v)])
dplyr::bind_rows(
.edges,
data.frame(
v = no_outgoing_edges,
w = rep(NA, length(no_outgoing_edges))
)
)
}
84 changes: 79 additions & 5 deletions R/layouts.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
#' Create a time-ordered coordinate data frame
#'
#' `time_ordered_coords()` is a helper function to create time-ordered DAGs.
#' Pass the results to the `coords` argument of `dagify()`. The default is to
#' assume you want variables to go from left to right in order by time.
#' Variables are spread along the y-axis using a simple algorithm to stack them.
#' You can also work along the y-axis by setting `direction = "y"`.
#' Pass the results to the `coords` argument of `dagify()`. If `.vars` if not
#' specified, these coordinates will be determined automatically. If you want to
#' be specific, you can also use a list or data frame. The default is to assume
#' you want variables to go from left to right in order by time. Variables are
#' spread along the y-axis using a simple algorithm to stack them. You can also
#' work along the y-axis by setting `direction = "y"`.
#'
#' @param .vars A list of character vectors, where each vector represents a
#' single time period. Alternatively, a data frame where the first column is
Expand All @@ -13,10 +15,23 @@
#' a sequence from 1 to the number of variables.
#' @param direction A character string indicating the axis along which the
#' variables should be time-ordered. Either "x" or "y". Default is "x".
#' @param auto_sort_direction If `.vars` is `NULL`: nodes will be placed as far
#' `"left"` or `"right"` of in the graph as is reasonable. Default is right,
#' meaning the nodes will be as close as possible in time to their
#' descendants.
#'
#' @return A tibble with three columns: `name`, `x`, and `y`.
#'
#' @examples
#'
#' dagify(
#' d ~ c1 + c2 + c3,
#' c1 ~ b1 + b2,
#' c3 ~ a,
#' b1 ~ a,
#' coords = time_ordered_coords()
#' ) %>% ggdag()
#'
#' coords <- time_ordered_coords(list(
#' # time point 1
#' "a",
Expand Down Expand Up @@ -51,9 +66,18 @@
#'
#' @export
#' @seealso [dagify()], [coords2df()], [coords2list()]
time_ordered_coords <- function(.vars, time_points = NULL, direction = c("x", "y")) {
time_ordered_coords <- function(.vars = NULL, time_points = NULL, direction = c("x", "y"), auto_sort_direction = c("right", "left")) {
direction <- match.arg(direction)

if (is.null(.vars)) {
auto_time_ordered_coords <- function(.df) {
.df <- auto_time_order(.df, sort_direction = auto_sort_direction)
time_ordered_coords(.df, direction = direction)
}

return(auto_time_ordered_coords)
}

if (is.data.frame(.vars)) {
stopifnot(ncol(.vars) >= 2)
time_points <- sort(unique(.vars[[2]]))
Expand Down Expand Up @@ -94,4 +118,54 @@ calculate_spread <- function(n) {
spread
}

auto_time_order <- function(graph, sort_direction = c("right", "left")) {
sort_direction <- match.arg(sort_direction)
names(graph)[1:2] <- c("name", "to")
graph2 <- graph
orders <- dplyr::tibble(name = character(), order = integer())

order_value <- 1

while (nrow(graph) > 0) {
no_incoming <- graph %>%
dplyr::filter(!(name %in% to)) %>%
dplyr::pull(name)

# Add the names and order values to the orders data frame
orders <- dplyr::add_row(orders, name = no_incoming, order = order_value)

# Remove the rows with no incoming edges
graph <- graph %>%
dplyr::filter(!name %in% no_incoming)

order_value <- order_value + 1
}

# Merge orders with the original tibble
final_result <- dplyr::left_join(orders, graph, by = "name") %>%
dplyr::select(name, order) %>%
dplyr::distinct()

if (sort_direction == "left") {
return(final_result)
}

final_result %>%
ggdag_left_join(graph2, by = "name") %>%
dplyr::group_by(name) %>%
dplyr::group_modify(~ right_sort_coords(.x, final_result)) %>%
dplyr::ungroup()
}

right_sort_coords <- function(.x, .orders) {
coords <- .orders %>%
dplyr::filter(name %in% .x$to) %>%
dplyr::pull(order)

if (length(coords) == 0) {
dplyr::tibble(order = .x$order)
} else {
dplyr::tibble(order = min(coords) - 1)
}
}

19 changes: 16 additions & 3 deletions R/tidy_dag.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
#'
#' @param .dagitty a `dagitty`
#' @param seed a numeric seed for reproducible layout generation
#' @param layout a layout available in `ggraph`. See [ggraph::create_layout()] for details.
#' @param layout a layout available in `ggraph`. See [ggraph::create_layout()]
#' for details. Alternatively, `"time_ordered"` will use
#' `time_ordered_coords()` to algorithmically sort the graph by time.
#' @param ... optional arguments passed to `ggraph::create_layout()`
#'
#' @return a `tidy_dagitty` object
Expand Down Expand Up @@ -30,13 +32,24 @@
#' geom_dag_edges() +
#' theme_dag()
tidy_dagitty <- function(.dagitty, seed = NULL, layout = "nicely", ...) {
check_verboten_layout(layout)

if (!is.null(seed)) set.seed(seed)

if (dagitty::graphType(.dagitty) != "dag") stop("`.dagitty` must be of graph type `dag`")
.dag <- .dagitty

if (layout == "time_ordered") {
coords <- .dagitty %>%
edges2df() %>%
auto_time_order() %>%
time_ordered_coords() %>%
coords2list()

dagitty::coordinates(.dagitty) <- coords
layout <- "nicely"
} else {
check_verboten_layout(layout)
}

no_existing_coords <- dagitty::coordinates(.dagitty) %>%
purrr::map_lgl(~ all(is.na(.x))) %>%
all()
Expand Down
4 changes: 3 additions & 1 deletion man/equivalent.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/tidy_dagitty.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 26 additions & 5 deletions man/time_ordered_coords.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 31 additions & 2 deletions tests/testthat/test-layouts.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,42 @@ test_that("time ordered layout works", {
"d"
))

p1 <- dagify(
d1 <- dagify(
d ~ c1 + c2 + c3,
c1 ~ b1 + b2,
c3 ~ a,
b1 ~ a,
coords = coords
) %>% ggdag()
)

p1 <- ggdag(d1)

auto_coords_coords <- dagify(
d ~ c1 + c2 + c3,
c1 ~ b1 + b2,
c3 ~ a,
b1 ~ a,
coords = time_ordered_coords()
)

# auto time ordering is the same
expect_equal(
coords2list(coords),
dagitty::coordinates(auto_coords_coords)
)

auto_coords_layout <- dagify(
d ~ c1 + c2 + c3,
c1 ~ b1 + b2,
c3 ~ a,
b1 ~ a
)

# specifying in dagify or tidy_dagitty is the same
expect_equal(
tidy_dagitty(auto_coords_layout, layout = "time_ordered")$data,
tidy_dagitty(auto_coords_coords)$data
)

# or use a data frame
x <- data.frame(
Expand Down