-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #58 from ropensci-review-tools/user-connections
start 'R/analyse-users.R'
- Loading branch information
Showing
10 changed files
with
237 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
#' Construct user-by-user square matrices of strengths of relation between | ||
#' users. | ||
#' | ||
#' @param user_data Result of `lapply(logins, repometrics_data_user)`. | ||
#' Contains the following fields: | ||
#' \enumerate{ | ||
#' \item general (not considered here) | ||
#' \item commit_cmt Comments on commits | ||
#' \item commits Commits to different repositories | ||
#' \item followers GitHub followers | ||
#' \item following Logins of people/orgs followed by user on GitHub | ||
#' \item issue_cmts Comments on issues | ||
#' \item issues Issues opened by user. | ||
#' } | ||
#' @return A `data.frame` of pairwise user logins, and proportions of overlap | ||
#' betwen repositories in the six variables described above. | ||
#' @noRd | ||
user_relation_matrices <- function (user_data) { | ||
|
||
user_names <- names (user_data) | ||
user_data <- add_user_login_cols (user_data) |> | ||
combine_user_data () | ||
|
||
# Pre-processing to name grouping column "repo" and count column "n": | ||
user_data$commit_cmt$repo <- | ||
paste0 (user_data$commit_cmt$org, user_data$commit_cmt$repo) | ||
|
||
user_data$followers <- | ||
dplyr::rename (user_data$followers, repo = followers) |> | ||
dplyr::mutate (n = 1L) | ||
user_data$following <- | ||
dplyr::rename (user_data$following, repo = following) |> | ||
dplyr::mutate (n = 1L) | ||
|
||
user_data$issue_cmts <- | ||
dplyr::rename (user_data$issue_cmts, repo = org_repo) |> | ||
dplyr::group_by (repo, login) |> | ||
dplyr::summarise (n = sum (num_comments), .groups = "keep") | ||
user_data$issues <- dplyr::rename (user_data$issues, repo = org_repo) |> | ||
dplyr::group_by (repo, login) |> | ||
dplyr::summarise (n = dplyr::n (), .groups = "keep") | ||
|
||
overlap <- lapply (names (user_data), function (n) { | ||
user_data [[n]] <- user_relate_fields (user_data, user_names, what = n) | ||
}) | ||
|
||
res <- dplyr::left_join (overlap [[1]], overlap [[2]], by = c ("login1", "login2")) |> | ||
dplyr::left_join (overlap [[3]], by = c ("login1", "login2")) |> | ||
dplyr::left_join (overlap [[4]], by = c ("login1", "login2")) |> | ||
dplyr::left_join (overlap [[5]], by = c ("login1", "login2")) |> | ||
dplyr::left_join (overlap [[6]], by = c ("login1", "login2")) | ||
|
||
return (res) | ||
} | ||
|
||
#' Add 'login' columns to all user data, so each element can be combined. | ||
#' @noRd | ||
add_user_login_cols <- function (user_data) { | ||
|
||
nms <- names (user_data) | ||
res <- lapply (seq_along (user_data), function (u) { | ||
nms_u <- names (user_data [[u]]) | ||
res_u <- lapply (seq_along (user_data [[u]]), function (i) { | ||
ud <- user_data [[u]] [[i]] | ||
if (is.data.frame (ud) && nrow (ud) > 0L) { | ||
ud$login <- names (user_data) [u] | ||
} else if (is.character (ud)) { | ||
ud <- data.frame (ud, login = names (user_data) [u]) | ||
names (ud) [1] <- names (user_data [[u]]) [i] | ||
} | ||
return (ud) | ||
}) | ||
names (res_u) <- nms_u | ||
|
||
return (res_u) | ||
}) | ||
names (res) <- nms | ||
|
||
return (res) | ||
} | ||
|
||
#' Combine all individual elements of 'user_data' for all users. | ||
#' | ||
#' The `add_user_login_cols` enables all data to be `rbind`-ed here. | ||
#' @noRd | ||
combine_user_data <- function (user_data) { | ||
|
||
data <- lapply (names (user_data [[1]]), function (n) { | ||
these <- lapply (user_data, function (i) i [[n]]) | ||
res <- do.call (rbind, these) | ||
rownames (res) <- NULL | ||
return (res) | ||
}) | ||
|
||
names (data) <- names (user_data [[1]]) | ||
data$general <- NULL | ||
|
||
return (data) | ||
} | ||
|
||
user_relate_fields <- function (user_data, user_names, what = "commits") { | ||
|
||
user_combs <- t (combn (user_names, m = 2L)) | ||
if (what == "commits") { | ||
user_data [[what]] <- dplyr::rename (user_data [[what]], n = num_commits) | ||
} else if (what == "commit_cmt") { | ||
user_data$commit_cmt$n <- 1L | ||
} | ||
|
||
res <- apply (user_combs, 1, function (i) { | ||
cmt1 <- dplyr::filter (user_data [[what]], login == i [1]) |> | ||
dplyr::group_by (repo) |> | ||
dplyr::summarise (n1 = sum (n)) | ||
cmt2 <- dplyr::filter (user_data [[what]], login == i [2]) |> | ||
dplyr::group_by (repo) |> | ||
dplyr::summarise (n2 = sum (n)) | ||
overlap <- dplyr::inner_join (cmt1, cmt2, by = "repo") | ||
|
||
res <- 0 | ||
if (nrow (overlap) > 0L) { | ||
res <- (sum (overlap$n1) + sum (overlap$n2)) / | ||
(sum (cmt1$n1) + sum (cmt2$n2)) | ||
} | ||
return (res) | ||
}) | ||
|
||
res <- data.frame ( | ||
login1 = user_combs [, 1], | ||
login2 = user_combs [, 2], | ||
res | ||
) | ||
names (res) [3] <- what | ||
|
||
return (res) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
# Mock version of data constructed in data-gh-user.R | ||
mock_user_rel_data <- function () { | ||
|
||
general <- list ( | ||
user = data.frame ( | ||
login = "me", | ||
name = "me too", | ||
email = "me@here.com", | ||
location = "somewhere", | ||
company = "noway", | ||
bio = NA_character_, | ||
avatarUrl = NA_character_, | ||
num_repositories = 1L, | ||
repos_contributed_to = 2L, | ||
num_starred_repos = 3L | ||
), | ||
orgs = data.frame ( | ||
name = "org", | ||
gh_org = "org", | ||
url = "https://github.com/org", | ||
web_url = NA_character_, | ||
location = NA_character_, | ||
num_members = 0L | ||
) | ||
) | ||
|
||
randchars <- function (len = 6L) { | ||
x <- sample (c (letters, LETTERS), size = len, replace = TRUE) | ||
paste0 (x, collapse = "") | ||
} | ||
followers <- vapply (1:10, function (i) randchars (), character (1L)) | ||
following <- vapply (1:5, function (i) randchars (), character (1L)) | ||
|
||
timestamp <- as.POSIXct ("2024-01-01T00:00:01") | ||
timestamp_minus_year <- as.POSIXct ("2023-01-01T00:00:01") | ||
|
||
commits <- data.frame ( | ||
repo = paste0 ("org", c ("one", "two")), | ||
num_commits = 1:2, | ||
date = rep (timestamp, 2L) | ||
) | ||
|
||
commit_cmt <- data.frame ( | ||
repo = commits$repo, | ||
num_commits = 1:2, | ||
date = rep (timestamp, 2L) | ||
) | ||
attr (commit_cmt, "started_at") <- timestamp_minus_year | ||
attr (commit_cmt, "ended_at") <- timestamp | ||
|
||
issues <- data.frame ( | ||
opened_at = rep (timestamp, 2L), | ||
closed_at = rep (timestamp, 2L), | ||
org_repo = commits$repo, | ||
issue_num = 1:2, | ||
num_issue_comments = 3:4, | ||
num_issue_participants = 5:6, | ||
num_repo_languages = 7:8, | ||
repo_languages = I (c ("R", "C")) | ||
) | ||
attr (issues, "started_at") <- timestamp_minus_year | ||
attr (issues, "ended_at") <- timestamp | ||
|
||
issue_cmts <- data.frame ( | ||
org_repo = commits$repo, | ||
issue_num = 1:2, | ||
created_at = rep (timestamp, 2L), | ||
num_comments = 1:2, | ||
num_participants = 3:4 | ||
) | ||
|
||
# Then assemble all: | ||
list ( | ||
general = general, | ||
commit_cmt = commit_cmt, | ||
commits = commits, | ||
followers = followers, | ||
following = following, | ||
issue_cmts = issue_cmts, | ||
issues = issues | ||
) | ||
} |
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
test_that ("user data martrices", { | ||
|
||
user_data <- lapply (1:2, function (i) mock_user_rel_data ()) | ||
names (user_data) <- c ("a", "b") | ||
|
||
mats <- user_relation_matrices (user_data) | ||
|
||
expect_s3_class (mats, "data.frame") | ||
expect_equal (ncol (mats), 8L) | ||
nms <- c ( | ||
"login1", "login2", "commit_cmt", "commits", "followers", "following", | ||
"issue_cmts", "issues" | ||
) | ||
expect_equal (names (mats), nms) | ||
expect_true (nrow (mats) > 0L) | ||
}) |