Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: ✨ add include_podiatrist_services() with tests #182

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Imports:
cli,
dplyr,
lifecycle,
lubridate,
rlang,
utils
Suggests:
Expand All @@ -50,5 +51,5 @@ Encoding: UTF-8
Language: en-US
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
Config/Needs/website: rmarkdown
57 changes: 57 additions & 0 deletions R/include-podiatrist-services.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#' Include diabetes-specific podiatrist services.
#'
#' #' Uses the `sysi` or `sssy` registers as input to extract the dates of all
#' diabetes-specific podiatrist services. Removes duplicate services on the
#' same date. Only the two earliest dates per individual are kept.
#'
#' #' The output is passed to the `join_inclusions()` function for the final
#' step of the inclusion process.
#'
#' @param sysi The SYSI register.
#' @param sssy The SSSY register.
#'
#' @return The same type as the input data, default as a [tibble::tibble()],
#' with two columns and up to two rows for each individual:
#'
#' - `pnr`: Identifier variable
#' - `date`: The dates of the first and second diabetes-specific
#' podiatrist record
#' - `has_podiatrist_services`: A logical variable that acts as a helper
#' indicator for use in later functions.
#'
#' @keywords internal
#' @inherit algorithm seealso
#'
#' @examples
#' \dontrun{
#' include_podiatrist_services(register_data$sysi, register_data$sssy)
#' }
include_podiatrist_services <- function(sysi, sssy) {
verify_required_variables(sysi, "sysi")
verify_required_variables(sssy, "sssy")
criteria <- get_algorithm_logic("podiatrist_services") |>
# To convert the string into an R expression.
rlang::parse_expr()

column_names_to_lower(sysi) |>
dplyr::full_join(column_names_to_lower(sssy),
by = dplyr::join_by(pnr, barnmak, speciale, honuge)
) |>
Comment on lines +37 to +39
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After explicitly adding the columns to join by, I get a note from devtools::check():
image

I'm not sure how to fix this, since join_by() doesn't seem to allow the use of .data$

# filter based algorithm logic
dplyr::filter(!!criteria) |>
# remove duplicates
dplyr::distinct() |>
# keep only the two columns we need and transform `honuge` to date
dplyr::mutate(
pnr = .data$pnr,
date = yyww_to_yyyymmdd(.data$honuge),
.keep = "none"
) |>
# FIXME: This might be computationally intensive.
dplyr::group_by(.data$pnr) |>
# keep earliest two dates per individual
dplyr::filter(dplyr::row_number(.data$date) %in% 1:2) |>
dplyr::ungroup() |>
# create Boolean helper variable
dplyr::mutate(has_podiatrist_services = TRUE)
}
Binary file modified R/sysdata.rda
Binary file not shown.
38 changes: 38 additions & 0 deletions R/yyww-to-yyyymmdd.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#' Convert date format YYWW to YYYY-MM-DD
#'
#' Since the exact date isn't given in the input, this function will set the
#' date to Monday of the week.
#'
#' As a precaution, a leading zero is added if it has been removed
#' This can e.g., happen if the input was "0107" and has been converted to a
#' numeric 107.
#'
#' @param yyww Character(s) of the format yyww
#'
#' @returns Date(s) in the format YYYY-MM-DD.
#'
#' @keywords internal
#'
#' @examples
#' \dontrun{
#' yyww_to_yyyymmdd("0102")
#' yyww_to_yyyymmdd(c("0102", "0304"))
#' }
yyww_to_yyyymmdd <- function(yyww) {
# ensure input is zero-padded to length 4
yyww <- sprintf("%04d", as.numeric(yyww))

year <- stringr::str_sub(yyww, 1, 2)
week <- stringr::str_sub(yyww, 3, 4)

# define helper variables
first_day_of_iso_year <- lubridate::ymd(paste(year, "-01-04"))
n_weekday_start_of_year <- lubridate::wday(first_day_of_iso_year, week_start = 1)

# calculate date
date <- first_day_of_iso_year
lubridate::week(date) <- as.numeric(week)
date <- date - n_weekday_start_of_year + 1 # adjust date to be Monday in that week

return(date)
}
23 changes: 12 additions & 11 deletions _targets/meta/meta
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name|type|data|command|depend|seed|path|time|size|bytes|format|repository|iterat
.__global__|object|87b8d4f266c27bf4|||||||||||||||
.Random.seed|object|f18bda37fe0d0048|||||||||||||||
add_fake_drug_name|function|0ab4e3d184e218f5|||||||||||||||
algorithm|stem|40723e52fd2c0786|e6ba0f398c6f1faa|396a620bf28c517b|-415220810||t20076.5829071292s|cfeff01d1edd45fa|355|rds|local|vector|||0.001||
algorithm_csv|stem|499e7e860784d28e|72b6696561a29259|2c530c1562a7fbd1|8336483|data-raw/algorithm.csv|t20076.5660942077s|06ecd420619ded44|130|file|local|vector|||0||
algorithm_rda|stem|c054eb5d080dd3d9|07904a5fa14d0614|b947939e3d638088|1907151832|/Users/au546191/Documents/misc/other_git_repos/osdc/data/algorithm.rda|t20076.5829141999s|449037faa65cfd59|401|file|local|vector|||0.016||
algorithm|stem|05d0d2a8ff6a8c9f|e6ba0f398c6f1faa|bc6d2f1060da6e07|-415220810||t20077.4829948109s|10b6c9f869744062|398|rds|local|vector|||0.147||
algorithm_csv|stem|6095e95958921ff6|72b6696561a29259|2c530c1562a7fbd1|8336483|data-raw/algorithm.csv|t20076.6337660168s|38030177dc1ffde5|184|file|local|vector|||0.001||
algorithm_rda|stem|60b4a600e2340b63|07904a5fa14d0614|914c57c9a673abfe|1907151832|/Users/au546191/Documents/misc/other_git_repos/osdc/data/algorithm.rda|t20077.4829961292s|c091eeb299dbae89|439|file|local|vector|||0.075||
column_names_to_lower|function|1f9ed89ec76f0ce4|||||||||||||||
create_algorithm_data|function|721802c40fcbce4e|||||||||||||||
create_fake_atc|function|2391be262acc5e8f|||||||||||||||
Expand All @@ -18,18 +18,18 @@ create_fake_npu|function|e8db372bbb8627ab|||||||||||||||
create_padded_integer|function|7429ec353f6bff77|||||||||||||||
create_simulated_data|function|025e1e98c7f4ab87|||||||||||||||
create_variable_description_data|function|ede086e002225ffb|||||||||||||||
get_algorithm_logic|function|e231edfcf599a22d|||||||||||||||
get_algorithm_logic|function|cc52ac0ae157e662|||||||||||||||
get_register_abbrev|function|a82c561691b651df|||||||||||||||
get_required_variables|function|986ccd76798db1ba|||||||||||||||
include_gld_purchases|function|eb16919c74056d15|||||||||||||||
include_hba1c|function|6f87e15bcf0dc244|||||||||||||||
include_podiatrist_services|function|bbae5e16a8310e90|||||||||||||||
include_gld_purchases|function|4194aeadd45b706b|||||||||||||||
include_hba1c|function|526b1aabb7e89bc4|||||||||||||||
include_podiatrist_services|function|f17639336fbe4afd|||||||||||||||
insert_analysiscode|function|a62a5138fce1d76f|||||||||||||||
insert_false_drug_names|function|03471ee24566433b|||||||||||||||
insert_false_metformin|function|5124fcc2167cfa45|||||||||||||||
insert_specific_atc|function|c9c607208b71b0b5|||||||||||||||
insertion_rate|function|cd1ada3c0c716ec8|||||||||||||||
internal_rda|stem|654051d3dd808bac|d9d14443b097054a|54c081cfe5f225a1|-682677189|/Users/au546191/Documents/misc/other_git_repos/osdc/R/sysdata.rda|t20076.5829151688s|eec7e47575e4fdfc|157855|file|local|vector|||0.051||
internal_rda|stem|bd075d278ed6e0ee|d9d14443b097054a|973f9d7cb6b86ff7|-682677189|/Users/au546191/Documents/misc/other_git_repos/osdc/R/sysdata.rda|t20077.4829968828s|08f8ace60ca47fba|158131|file|local|vector|||0.05||
join_lpr2|function|73dfa05a6f146f76|||||||||||||||
join_lpr3|function|1690200e83e2d60d|||||||||||||||
pad_integers|function|6f8d902b8ca93b07|||||||||||||||
Expand All @@ -39,17 +39,18 @@ read_variable_description_data|function|bac5d03645e8328d|||||||||||||||
register_as_md_header|function|1b9b0bb62cc1e264|||||||||||||||
register_data|stem|fc4f033ef3e69d6a|4ee7a1a9cf009abe|41c68bb4ccf81abf|-143937024||t20076.5829124231s|e09975c5f10ac745|186631|rds|local|vector|||0.408||
register_data_as_md_table|function|bbdda3785a7bdb63|||||||||||||||
register_data_rda|stem|d38558aaf18ba4a7|677ba3caa7b36ca4|b64f1c34fd720564|1912226099|/Users/au546191/Documents/misc/other_git_repos/osdc/data/register_data.rda|t20076.5829161683s|ac36667d8f414f46|156297|file|local|vector|||0.077||
register_data_rda|stem|d38558aaf18ba4a7|677ba3caa7b36ca4|b64f1c34fd720564|1912226099|/Users/au546191/Documents/misc/other_git_repos/osdc/data/register_data.rda|t20076.6495361389s|ac36667d8f414f46|156297|file|local|vector|||0.077||
registers_as_md_table|function|ab56ce7262ba313b|||||||||||||||
simulate_data|function|ecab2e8eda0ad248|||||||||||||||
simulation_definitions|stem|61079209f80ca2de|a7254d482fd90608|f22d1020f62b8099|1404983078||t20076.5089902499s|2010c91582ffefcf|763|rds|local|vector|||0.221||
simulation_definitions_csv|stem|91efb1a8ec9391b8|d177045a63e64543|2c530c1562a7fbd1|-1333003177|data-raw/simulation-definitions.csv|t20076.5660942234s|2688973cdee8dacd|3823|file|local|vector|||0||
to_yyww|function|4d8ab3df391faebd|||||||||||||||
to_yyyymmdd|function|47a732ab76c8b4fd|||||||||||||||
variable_description|stem|2ac9deeee55b04f5|c2153d67e5651737|a8e94a865056a639|-1244655093||t20076.5829070039s|c5e2c6b0e4727be8|1313|rds|local|vector|||0.152||
variable_description|stem|2ac9deeee55b04f5|c2153d67e5651737|a8e94a865056a639|-1244655093||t20076.6090410234s|c5e2c6b0e4727be8|1313|rds|local|vector|||0.152||
variable_description_csv|stem|c6ca89bf750f10a9|2347307fa16d111b|2c530c1562a7fbd1|1721723811|data-raw/variable-description.csv|t20076.5660942277s|75bbd1de9a7a9806|4683|file|local|vector|||0||
variable_description_rda|stem|f056e784254cc664|afe48e93eb17b9c8|9ce21d3732a18b27|1356510397|/Users/au546191/Documents/misc/other_git_repos/osdc/data/variable_description.rda|t20076.5829138669s|86b7064cf66fe42e|1565|file|local|vector|||0.088||
variable_description_rda|stem|f056e784254cc664|afe48e93eb17b9c8|9ce21d3732a18b27|1356510397|/Users/au546191/Documents/misc/other_git_repos/osdc/data/variable_description.rda|t20076.6495361841s|86b7064cf66fe42e|1565|file|local|vector|||0.088||
variables_as_md_table|function|bf10d1f0df6a170a|||||||||||||||
verify_required_variables|function|070c8fb3014efb9e|||||||||||||||
write_external_rda|function|c0c90c7048a0d89b|||||||||||||||
write_internal_rda|function|95b61d841de16e96|||||||||||||||
yyww_to_date|function|1036c7f5deb56fd4|||||||||||||||
2 changes: 1 addition & 1 deletion data-raw/algorithm.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name,logic
hba1c,(analysiscode == 'NPU27300' AND value >= 48) OR (analysiscode == 'NPU03835' AND value >= 6.5)
gld,atc =~ '^A10'

podiatrist_services,speciale =~ '^54' AND barnmak != 0
Binary file modified data/algorithm.rda
Binary file not shown.
2 changes: 1 addition & 1 deletion man/get_algorithm_logic.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 39 additions & 0 deletions man/include_podiatrist_services.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 30 additions & 0 deletions man/yyww_to_yyyymmdd.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

113 changes: 113 additions & 0 deletions tests/testthat/test-include-podiatrist-services.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
sysi <- tibble::tribble(
~pnr, ~barnmak, ~speciale, ~honuge,
1000000000, 0, 54711, "1879", # removed since barnmark = 0
2000000000, 1, 54800, "9207", # kept but deduplicated
2000000000, 1, 54800, "9207", # kept but deduplicated
3000000000, 1, 54005, "0752", # kept bc it's the first date for this person
3000000000, 1, 54005, "2430", # removed bc it's the third date for this person
4000000000, 1, 55000, "0044" # removed since speciale doesn't start with 54
)

sssy <- tibble::tribble(
~pnr, ~barnmak, ~speciale, ~honuge,
2000000000, 1, 54800, "9207", # kept but deduplicated
3000000000, 0, 10000, "1801", # removed since barnmark = 0
3000000000, 1, 54005, "0830", # kept bc it's the second date for this person
4000000000, 1, 76255, "1123", # removed since speciale doesn't start with 54
)

expected <- tibble::tribble(
~pnr, ~date, ~has_podiatrist_services,
2000000000, lubridate::ymd("1992-02-10"),TRUE,
3000000000, lubridate::ymd("2007-12-24"),TRUE,
3000000000, lubridate::ymd("2008-07-21"),TRUE
)


test_that("sysi needs expected variables", {
sysi <- sysi[-2]
expect_error(include_podiatrist_services(sysi, sssy))
})

test_that("ssy needs expected variables", {
sssy <- sssy[-2]
expect_error(include_podiatrist_services(sysi, sssy))
})


test_that("those with inclusion are kept", {
actual <- include_podiatrist_services(sysi, sssy)
expect_equal(actual, expected)
})

test_that("casing of input variables doesn't matter", {
sysi <- sysi |>
dplyr::rename_with(\(columns) toupper(columns))
sssy <- sssy |>
dplyr::rename_with(\(columns) toupper(columns))
actual <- include_podiatrist_services(sysi, sssy)
expect_equal(actual, expected)
})

test_that("verification works for DuckDB Database", {
skip_on_cran()
skip_if_not_installed("duckplyr")

sysi <- duckplyr::as_duckplyr_tibble(sysi)
sssy <- duckplyr::as_duckplyr_tibble(sssy)
actual <- include_podiatrist_services(sysi, sssy)

actual_rows <- actual |>
dplyr::count() |>
dplyr::pull(n)

expect_equal(actual_rows, nrow(expected))
expect_equal(colnames(actual), colnames(expected))
})

test_that("verification works for Arrow Tables (from Parquet)", {
# FIXME: This test fails because of some issue with the criteria and colnames
skip()
skip_on_cran()
skip_if_not_installed("arrow")

sysi <- arrow::as_arrow_table(sysi)
sssy <- arrow::as_arrow_table(sssy)
actual <- include_podiatrist_services(sysi, sssy)

actual_rows <- actual |>
dplyr::count() |>
dplyr::pull(n)

expect_equal(actual_rows, nrow(expected))
# TODO: Arrow doesn't work with colname(), fix?
expect_equal(names(actual), colnames(expected))
})

test_that("verification works for data.frame", {
sysi <- as.data.frame(sysi)
ssy <- as.data.frame(sssy)
actual <- include_podiatrist_services(sysi, sssy)

actual_rows <- actual |>
dplyr::count() |>
dplyr::pull(n)

expect_equal(actual_rows, nrow(expected))
expect_equal(colnames(actual), colnames(expected))
})

test_that("verification works for data.table", {
skip_on_cran()
skip_if_not_installed("data.table")
sysi <- data.table::as.data.table(sysi)
sssy <- data.table::as.data.table(sssy)
actual <- include_podiatrist_services(sysi, sssy)

actual_rows <- actual |>
dplyr::count() |>
dplyr::pull(n)

expect_equal(actual_rows, nrow(expected))
expect_equal(colnames(actual), colnames(expected))
})
28 changes: 28 additions & 0 deletions tests/testthat/test-yyww-to-yyyymmdd.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
test_that("conversion works when 01-01 is Monday in week 1", {
expect_equal(yyww_to_yyyymmdd("2439"), lubridate::ymd("2024-09-23"))
})

test_that("conversion works when 01-01 is Friday in week 52 of the prior year", {
expect_equal(yyww_to_yyyymmdd("9307"), lubridate::ymd("1993-02-15"))
})

test_that("conversion works for week 53", {
expect_equal(yyww_to_yyyymmdd("1853"), lubridate::ymd("2018-12-31"))
})

test_that("conversion works for zero-padded year and week (i.e., numbers < 10)", {
expect_equal(yyww_to_yyyymmdd("0107"), lubridate::ymd("2001-02-12"))
})

test_that("conversion works for numeric one digit year", {
# This could happen if the input was "0107" and has been converted to numeric
expect_equal(yyww_to_yyyymmdd(107), lubridate::ymd("2001-02-12"))
})

test_that("conversion works for numeric year 2000", {
expect_equal(yyww_to_yyyymmdd(0007), lubridate::ymd("2000-02-14"))
})

test_that("conversion works for multiple inputs", {
expect_equal(yyww_to_yyyymmdd(c("0107", "2439")), lubridate::ymd("2001-02-12", "2024-09-23"))
})