Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: ✨ add include_podiatrist_services() with tests #182

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Imports:
cli,
dplyr,
lifecycle,
lubridate,
rlang,
utils
Suggests:
Expand Down
47 changes: 47 additions & 0 deletions R/include-podiatrist-services.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#' Include only those who have had podiatrist services
#'
#' See [algorithm] for the logic used to filter these patients.
#' In addition to the algorithm logic, duplicates are removed and only
#' the two earliest dates per individual are kept.
#'
#' The output is passed to `join_inclusions()` function to be joined with
#' the rest of the inclusion data.
#'
#' @param sysi The SYSI register.
#' @param sssy The SSSY register.
#'
#' @return The same type as the input data, default as a [tibble::tibble()]
#' with two columns:
#' - `pnr`: Personal identification variable.
#' - `dates`: The dates of podiatrist services.
#'
#' @keywords internal
#'
#' @examples
#' \dontrun{
#' include_podiatrist_services(register_data$sysi, register_data$sssy)
#' }
include_podiatrist_services <- function(sysi, sssy) {
verify_required_variables(sysi, "sysi")
verify_required_variables(sssy, "sssy")
criteria <- get_algorithm_logic("podiatrist_services") |>
# To convert the string into an R expression.
rlang::parse_expr()

dplyr::full_join(column_names_to_lower(sysi), column_names_to_lower(sssy)) |>
# filter based algorithm logic
dplyr::filter(!!criteria) |>
# remove duplicates
dplyr::distinct() |>
# keep only the two columns we need and transform `honuge` to date
dplyr::mutate(
pnr = .data$pnr,
date = yyww_to_date(.data$honuge),
.keep = "none"
) |>
# FIXME: This might be computationally intensive.
dplyr::group_by(.data$pnr) |>
# Keep earliest two dates per individual
dplyr::filter(dplyr::row_number(.data$date) %in% 1:2) |>
dplyr::ungroup()
}
Binary file modified R/sysdata.rda
Binary file not shown.
38 changes: 38 additions & 0 deletions R/yyww-to-date.R
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This whole thing feels a bit hacky. Let me know if this isn't clear or you have another idea of how to do this.

Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#' Convert date format YYWW to YYYY-MM-DD
#'
#' Since the exact date isn't given in the input, this function will set the
#' date to Monday of the week.
#'
#' As a precaution, a leading zero is added if it has been removed
#' This can e.g., happen if the input was "0107" and has been converted to a
#' numeric 107.
#'
#' @param yyww Character(s) of the format yyww
#'
#' @returns Date(s) in the format YYYY-MM-DD.
#'
#' @keywords internal
#'
#' @examples
#' \dontrun{
#' yyww_to_date("0102")
#' yyww_to_date(c("0102", "0304"))
#' }
yyww_to_date <- function(yyww) {
# Add leading zero to year, if it has been removed
yyww <- ifelse(stringr::str_length(yyww) == 3, paste0("0", yyww), yyww)

year <- stringr::str_sub(yyww, 1, 2)
week <- stringr::str_sub(yyww, 3, 4)

# define helper variables
first_day_of_iso_year <- lubridate::ymd(paste(year, "-01-04"))
n_weekday_start_of_year <- lubridate::wday(first_day_of_iso_year, week_start = 1)

# calculate date
date <- first_day_of_iso_year
lubridate::week(date) <- as.numeric(week)
date <- date - n_weekday_start_of_year + 1 # adjust date to be Monday of in that week
signekb marked this conversation as resolved.
Show resolved Hide resolved

return(date)
}
23 changes: 12 additions & 11 deletions _targets/meta/meta
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name|type|data|command|depend|seed|path|time|size|bytes|format|repository|iterat
.__global__|object|87b8d4f266c27bf4|||||||||||||||
.Random.seed|object|f18bda37fe0d0048|||||||||||||||
add_fake_drug_name|function|0ab4e3d184e218f5|||||||||||||||
algorithm|stem|40723e52fd2c0786|e6ba0f398c6f1faa|396a620bf28c517b|-415220810||t20076.5829071292s|cfeff01d1edd45fa|355|rds|local|vector|||0.001||
algorithm_csv|stem|499e7e860784d28e|72b6696561a29259|2c530c1562a7fbd1|8336483|data-raw/algorithm.csv|t20076.5660942077s|06ecd420619ded44|130|file|local|vector|||0||
algorithm_rda|stem|c054eb5d080dd3d9|07904a5fa14d0614|b947939e3d638088|1907151832|/Users/au546191/Documents/misc/other_git_repos/osdc/data/algorithm.rda|t20076.5829141999s|449037faa65cfd59|401|file|local|vector|||0.016||
algorithm|stem|05d0d2a8ff6a8c9f|e6ba0f398c6f1faa|bc6d2f1060da6e07|-415220810||t20077.4829948109s|10b6c9f869744062|398|rds|local|vector|||0.147||
algorithm_csv|stem|6095e95958921ff6|72b6696561a29259|2c530c1562a7fbd1|8336483|data-raw/algorithm.csv|t20076.6337660168s|38030177dc1ffde5|184|file|local|vector|||0.001||
algorithm_rda|stem|60b4a600e2340b63|07904a5fa14d0614|914c57c9a673abfe|1907151832|/Users/au546191/Documents/misc/other_git_repos/osdc/data/algorithm.rda|t20077.4829961292s|c091eeb299dbae89|439|file|local|vector|||0.075||
column_names_to_lower|function|1f9ed89ec76f0ce4|||||||||||||||
create_algorithm_data|function|721802c40fcbce4e|||||||||||||||
create_fake_atc|function|2391be262acc5e8f|||||||||||||||
Expand All @@ -18,18 +18,18 @@ create_fake_npu|function|e8db372bbb8627ab|||||||||||||||
create_padded_integer|function|7429ec353f6bff77|||||||||||||||
create_simulated_data|function|025e1e98c7f4ab87|||||||||||||||
create_variable_description_data|function|ede086e002225ffb|||||||||||||||
get_algorithm_logic|function|e231edfcf599a22d|||||||||||||||
get_algorithm_logic|function|cc52ac0ae157e662|||||||||||||||
get_register_abbrev|function|a82c561691b651df|||||||||||||||
get_required_variables|function|986ccd76798db1ba|||||||||||||||
include_gld_purchases|function|eb16919c74056d15|||||||||||||||
include_hba1c|function|6f87e15bcf0dc244|||||||||||||||
include_podiatrist_services|function|bbae5e16a8310e90|||||||||||||||
include_gld_purchases|function|4194aeadd45b706b|||||||||||||||
include_hba1c|function|526b1aabb7e89bc4|||||||||||||||
include_podiatrist_services|function|f17639336fbe4afd|||||||||||||||
insert_analysiscode|function|a62a5138fce1d76f|||||||||||||||
insert_false_drug_names|function|03471ee24566433b|||||||||||||||
insert_false_metformin|function|5124fcc2167cfa45|||||||||||||||
insert_specific_atc|function|c9c607208b71b0b5|||||||||||||||
insertion_rate|function|cd1ada3c0c716ec8|||||||||||||||
internal_rda|stem|654051d3dd808bac|d9d14443b097054a|54c081cfe5f225a1|-682677189|/Users/au546191/Documents/misc/other_git_repos/osdc/R/sysdata.rda|t20076.5829151688s|eec7e47575e4fdfc|157855|file|local|vector|||0.051||
internal_rda|stem|bd075d278ed6e0ee|d9d14443b097054a|973f9d7cb6b86ff7|-682677189|/Users/au546191/Documents/misc/other_git_repos/osdc/R/sysdata.rda|t20077.4829968828s|08f8ace60ca47fba|158131|file|local|vector|||0.05||
join_lpr2|function|73dfa05a6f146f76|||||||||||||||
join_lpr3|function|1690200e83e2d60d|||||||||||||||
pad_integers|function|6f8d902b8ca93b07|||||||||||||||
Expand All @@ -39,17 +39,18 @@ read_variable_description_data|function|bac5d03645e8328d|||||||||||||||
register_as_md_header|function|1b9b0bb62cc1e264|||||||||||||||
register_data|stem|fc4f033ef3e69d6a|4ee7a1a9cf009abe|41c68bb4ccf81abf|-143937024||t20076.5829124231s|e09975c5f10ac745|186631|rds|local|vector|||0.408||
register_data_as_md_table|function|bbdda3785a7bdb63|||||||||||||||
register_data_rda|stem|d38558aaf18ba4a7|677ba3caa7b36ca4|b64f1c34fd720564|1912226099|/Users/au546191/Documents/misc/other_git_repos/osdc/data/register_data.rda|t20076.5829161683s|ac36667d8f414f46|156297|file|local|vector|||0.077||
register_data_rda|stem|d38558aaf18ba4a7|677ba3caa7b36ca4|b64f1c34fd720564|1912226099|/Users/au546191/Documents/misc/other_git_repos/osdc/data/register_data.rda|t20076.6495361389s|ac36667d8f414f46|156297|file|local|vector|||0.077||
registers_as_md_table|function|ab56ce7262ba313b|||||||||||||||
simulate_data|function|ecab2e8eda0ad248|||||||||||||||
simulation_definitions|stem|61079209f80ca2de|a7254d482fd90608|f22d1020f62b8099|1404983078||t20076.5089902499s|2010c91582ffefcf|763|rds|local|vector|||0.221||
simulation_definitions_csv|stem|91efb1a8ec9391b8|d177045a63e64543|2c530c1562a7fbd1|-1333003177|data-raw/simulation-definitions.csv|t20076.5660942234s|2688973cdee8dacd|3823|file|local|vector|||0||
to_yyww|function|4d8ab3df391faebd|||||||||||||||
to_yyyymmdd|function|47a732ab76c8b4fd|||||||||||||||
variable_description|stem|2ac9deeee55b04f5|c2153d67e5651737|a8e94a865056a639|-1244655093||t20076.5829070039s|c5e2c6b0e4727be8|1313|rds|local|vector|||0.152||
variable_description|stem|2ac9deeee55b04f5|c2153d67e5651737|a8e94a865056a639|-1244655093||t20076.6090410234s|c5e2c6b0e4727be8|1313|rds|local|vector|||0.152||
variable_description_csv|stem|c6ca89bf750f10a9|2347307fa16d111b|2c530c1562a7fbd1|1721723811|data-raw/variable-description.csv|t20076.5660942277s|75bbd1de9a7a9806|4683|file|local|vector|||0||
variable_description_rda|stem|f056e784254cc664|afe48e93eb17b9c8|9ce21d3732a18b27|1356510397|/Users/au546191/Documents/misc/other_git_repos/osdc/data/variable_description.rda|t20076.5829138669s|86b7064cf66fe42e|1565|file|local|vector|||0.088||
variable_description_rda|stem|f056e784254cc664|afe48e93eb17b9c8|9ce21d3732a18b27|1356510397|/Users/au546191/Documents/misc/other_git_repos/osdc/data/variable_description.rda|t20076.6495361841s|86b7064cf66fe42e|1565|file|local|vector|||0.088||
variables_as_md_table|function|bf10d1f0df6a170a|||||||||||||||
verify_required_variables|function|070c8fb3014efb9e|||||||||||||||
write_external_rda|function|c0c90c7048a0d89b|||||||||||||||
write_internal_rda|function|95b61d841de16e96|||||||||||||||
yyww_to_date|function|1036c7f5deb56fd4|||||||||||||||
2 changes: 1 addition & 1 deletion data-raw/algorithm.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name,logic
hba1c,(analysiscode == 'NPU27300' AND value >= 48) OR (analysiscode == 'NPU03835' AND value >= 6.5)
gld,atc =~ '^A10'

podiatrist_services,speciale =~ '^54' AND barnmak != 0
Binary file modified data/algorithm.rda
Binary file not shown.
2 changes: 1 addition & 1 deletion man/get_algorithm_logic.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 36 additions & 0 deletions man/include_podiatrist_services.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 30 additions & 0 deletions man/yyww_to_date.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

112 changes: 112 additions & 0 deletions tests/testthat/test-include-podiatrist-services.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
sysi <- tibble::tribble(
~pnr, ~barnmak, ~speciale, ~honuge,
1000000000, 0, 54711, "1879", # removed since barnmark = 0
2000000000, 1, 54800, "9207", # kept but deduplicated
2000000000, 1, 54800, "9207", # kept but deduplicated
3000000000, 1, 54005, "0752", # kept bc it's the first date for this person
3000000000, 1, 54005, "2430" # removed bc it's the third date for this person
)

sssy <- tibble::tribble(
~pnr, ~barnmak, ~speciale, ~honuge,
2000000000, 1, 54800, "9207", # kept but deduplicated
3000000000, 0, 10000, "1801", # removed since barnmark = 0
3000000000, 1, 54005, "0830", # kept bc it's the second date for this person
4000000000, 1, 76255, "1123", # removed since speciale doesn't start with 54
)

expected <- tibble::tribble(
~pnr, ~date,
2000000000, lubridate::ymd("1992-02-10"),
3000000000, lubridate::ymd("2007-12-24"),
3000000000, lubridate::ymd("2008-07-21")
)


test_that("sysi needs expected variables", {
sysi <- sysi[-2]
expect_error(include_podiatrist_services(sysi, sssy))
})

test_that("ssy needs expected variables", {
sssy <- sssy[-2]
expect_error(include_podiatrist_services(sysi, sssy))
})


test_that("those with inclusion are kept", {
actual <- include_podiatrist_services(sysi, sssy)
expect_equal(actual, expected)
})

test_that("casing of input variables doesn't matter", {
sysi <- sysi |>
dplyr::rename_with(\(columns) toupper(columns))
sssy <- sssy |>
dplyr::rename_with(\(columns) toupper(columns))
actual <- include_podiatrist_services(sysi, sssy)
expect_equal(actual, expected)
})

test_that("verification works for DuckDB Database", {
skip_on_cran()
skip_if_not_installed("duckplyr")

sysi <- duckplyr::as_duckplyr_tibble(sysi)
sssy <- duckplyr::as_duckplyr_tibble(sssy)
actual <- include_podiatrist_services(sysi, sssy)

actual_rows <- actual |>
dplyr::count() |>
dplyr::pull(n)

expect_equal(actual_rows, nrow(expected))
expect_equal(colnames(actual), colnames(expected))
})

test_that("verification works for Arrow Tables (from Parquet)", {
# FIXME: This test fails because of some issue with the criteria and colnames
skip()
skip_on_cran()
skip_if_not_installed("arrow")

sysi <- arrow::as_arrow_table(sysi)
sssy <- arrow::as_arrow_table(sssy)
actual <- include_podiatrist_services(sysi, sssy)

actual_rows <- actual |>
dplyr::count() |>
dplyr::pull(n)

expect_equal(actual_rows, nrow(expected))
# TODO: Arrow doesn't work with colname(), fix?
expect_equal(names(actual), colnames(expected))
})

test_that("verification works for data.frame", {
sysi <- as.data.frame(sysi)
ssy <- as.data.frame(sssy)
actual <- include_podiatrist_services(sysi, sssy)

actual_rows <- actual |>
dplyr::count() |>
dplyr::pull(n)

expect_equal(actual_rows, nrow(expected))
expect_equal(colnames(actual), colnames(expected))
})

test_that("verification works for data.table", {
skip_on_cran()
skip_if_not_installed("data.table")
sysi <- data.table::as.data.table(sysi)
sssy <- data.table::as.data.table(sssy)
actual <- include_podiatrist_services(sysi, sssy)

actual_rows <- actual |>
dplyr::count() |>
dplyr::pull(n)

expect_equal(actual_rows, nrow(expected))
expect_equal(colnames(actual), colnames(expected))
})
24 changes: 24 additions & 0 deletions tests/testthat/test-yyww-to-date.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
test_that("conversion works when 01-01 is Monday in week 1", {
expect_equal(yyww_to_date("2439"), lubridate::ymd("2024-09-23"))
})

test_that("conversion works when 01-01 is Friday in week 52 of the prior year", {
expect_equal(yyww_to_date("9307"), lubridate::ymd("1993-02-15"))
})

test_that("conversion works for week 53", {
expect_equal(yyww_to_date("1853"), lubridate::ymd("2018-12-31"))
})

test_that("conversion works for zero-padded year and week (i.e., numbers < 10)", {
expect_equal(yyww_to_date("0107"), lubridate::ymd("2001-02-12"))
})

test_that("conversion works for numeric one digit year", {
# This could happen if the input was "0107" and has been converted to numeric
expect_equal(yyww_to_date(107), lubridate::ymd("2001-02-12"))
})

test_that("conversion works for multiple inputs", {
expect_equal(yyww_to_date(c("0107", "2439")), lubridate::ymd("2001-02-12", "2024-09-23"))
})
Loading