From 9de10e666d114c14d48fc8af0311b7588d16585d Mon Sep 17 00:00:00 2001 From: wlandau Date: Fri, 5 Apr 2024 14:49:27 -0400 Subject: [PATCH] To align with https://github.com/ropensci/targets/issues/1244 and https://github.com/ropensci/targets/pull/1262, switch the hashing functions from digest::digest() to secretbase::siphash13(). --- DESCRIPTION | 4 +- NAMESPACE | 2 +- NEWS.md | 4 +- R/tar_map.R | 8 +- R/tar_package.R | 2 +- R/tar_render_rep_raw.R | 4 +- R/utils_data.R | 7 -- R/utils_hash.R | 7 ++ codemeta.json | 142 +++++++++++++++--------------- tests/testthat/test-tar_map_rep.R | 12 +-- tests/testthat/test-tar_rep.R | 30 +++++-- tests/testthat/test-tar_rep2.R | 24 ++--- 12 files changed, 125 insertions(+), 121 deletions(-) delete mode 100644 R/utils_data.R create mode 100644 R/utils_hash.R diff --git a/DESCRIPTION b/DESCRIPTION index 6531d342..35bd293c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,7 +9,7 @@ Description: Function-oriented Make-like declarative pipelines for reproducible pipelines concisely and compactly. The methods in this package were influenced by the 'drake' R package by Will Landau (2018) . -Version: 0.8.0.9000 +Version: 0.8.0.9001 License: MIT + file LICENSE URL: https://docs.ropensci.org/tarchetypes/, https://github.com/ropensci/tarchetypes BugReports: https://github.com/ropensci/tarchetypes/issues @@ -40,11 +40,11 @@ Authors@R: c( Depends: R (>= 3.5.0) Imports: - digest (>= 0.6.25), dplyr (>= 1.0.0), fs (>= 1.4.2), parallel, rlang (>= 0.4.7), + secretbase (>= 0.4.0), targets (>= 1.6.0), tibble (>= 3.0.1), tidyselect (>= 1.1.0), diff --git a/NAMESPACE b/NAMESPACE index b2e90073..4670037d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -125,7 +125,6 @@ export(tar_torch) export(tar_url) export(walk_ast) export(walk_call_knitr) -importFrom(digest,digest) importFrom(dplyr,bind_rows) importFrom(dplyr,mutate) importFrom(dplyr,select) @@ -148,6 +147,7 @@ importFrom(rlang,expr) importFrom(rlang,inform) importFrom(rlang,is_missing) importFrom(rlang,quo_squash) +importFrom(secretbase,siphash13) importFrom(targets,tar_assert_chr) importFrom(targets,tar_assert_dbl) importFrom(targets,tar_assert_df) diff --git a/NEWS.md b/NEWS.md index e04201cf..673327ce 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,8 @@ -# tarchetypes 0.8.0.9000 (development) +# tarchetypes 0.8.0.9001 (development) +## Invalidating changes +* To align with https://github.com/ropensci/targets/issues/1244 and https://github.com/ropensci/targets/pull/1262, switch the hashing functions from `digest::digest()` to `secretbase::siphash13()`. # tarchetypes 0.8.0 diff --git a/R/tar_map.R b/R/tar_map.R index 47e0267e..39418089 100644 --- a/R/tar_map.R +++ b/R/tar_map.R @@ -132,13 +132,7 @@ tar_map_produce_suffix <- function(values, names) { } tar_map_default_suffixes <- function(values) { - id <- apply( - X = values, - MARGIN = 1, - FUN = digest::digest, - algo = "xxhash32" - ) - list(id = id) + list(id = apply(X = values, MARGIN = 1, FUN = hash_object)) } tar_map_target <- function(target, values, descriptions) { diff --git a/R/tar_package.R b/R/tar_package.R index b7e275d7..96da6044 100644 --- a/R/tar_package.R +++ b/R/tar_package.R @@ -5,7 +5,6 @@ #' convenient helper functions to create specialized targets, making #' pipelines in targets easier and cleaner to write and understand. #' @name tarchetypes-package -#' @importFrom digest digest #' @importFrom dplyr bind_rows mutate select #' @importFrom fs dir_create is_dir path_ext path_ext_remove #' path_ext_set path_rel @@ -13,6 +12,7 @@ #' stopCluster #' @importFrom rlang as_function call2 check_installed enquo expr #' inform is_missing quo_squash +#' @importFrom secretbase siphash13 #' @importFrom targets tar_assert_chr tar_assert_dbl tar_assert_df #' tar_assert_envir tar_assert_equal_lengths #' tar_assert_expr tar_assert_file tar_assert_flag diff --git a/R/tar_render_rep_raw.R b/R/tar_render_rep_raw.R index 7e2fe570..3f41d573 100644 --- a/R/tar_render_rep_raw.R +++ b/R/tar_render_rep_raw.R @@ -364,7 +364,5 @@ tar_render_rep_rep <- function(rep, params, args, path, seeds) { } tar_render_rep_default_path <- function(path, params) { - out <- fs::path_ext_remove(path) - hash <- digest::digest(params, algo = "xxhash32") - sprintf("%s_%s", out, hash) + sprintf("%s_%s", fs::path_ext_remove(path), hash_object(params)) } diff --git a/R/utils_data.R b/R/utils_data.R deleted file mode 100644 index 4ac11345..00000000 --- a/R/utils_data.R +++ /dev/null @@ -1,7 +0,0 @@ -hash_rows <- function(data) { - out <- map_rows( - data, - ~digest::digest(.x, algo = "xxhash32") - ) - make.unique(out, sep = "_") -} diff --git a/R/utils_hash.R b/R/utils_hash.R new file mode 100644 index 00000000..e1e7fbbf --- /dev/null +++ b/R/utils_hash.R @@ -0,0 +1,7 @@ +hash_object <- function(object) { + secretbase::siphash13(x = object) +} + +hash_rows <- function(data) { + make.unique(map_rows(data, hash_object), sep = "_") +} diff --git a/codemeta.json b/codemeta.json index c66cc62a..fbcec2b1 100644 --- a/codemeta.json +++ b/codemeta.json @@ -1,50 +1,47 @@ { - "@context": [ - "https://doi.org/10.5063/schema/codemeta-2.0", - "http://schema.org" - ], + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "@type": "SoftwareSourceCode", "identifier": "tarchetypes", - "description": "Function-oriented Make-like declarative workflows for\n Statistics and data science are supported in the 'targets' R package.\n As an extension to 'targets', the 'tarchetypes' package provides\n convenient user-side functions to make 'targets' easier to use.\n By establishing reusable archetypes for common kinds of\n targets and pipelines, these functions help express complicated\n reproducible workflows concisely and compactly.\n The methods in this package were influenced by the 'drake' R package\n by Will Landau (2018) .", + "description": "Function-oriented Make-like declarative pipelines for Statistics and data science are supported in the 'targets' R package. As an extension to 'targets', the 'tarchetypes' package provides convenient user-side functions to make 'targets' easier to use. By establishing reusable archetypes for common kinds of targets and pipelines, these functions help express complicated reproducible pipelines concisely and compactly. The methods in this package were influenced by the 'drake' R package by Will Landau (2018) .", "name": "tarchetypes: Archetypes for Targets", + "relatedLink": ["https://docs.ropensci.org/tarchetypes/", "https://CRAN.R-project.org/package=tarchetypes"], "codeRepository": "https://github.com/ropensci/tarchetypes", - "relatedLink": [ - "https://docs.ropensci.org/tarchetypes", - "https://docs.ropensci.org/tarchetypes/", - "https://CRAN.R-project.org/package=tarchetypes" - ], "issueTracker": "https://github.com/ropensci/tarchetypes/issues", "license": "https://spdx.org/licenses/MIT", - "version": "0.4.1", + "version": "0.8.0.9001", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R", "url": "https://r-project.org" }, - "runtimePlatform": "R version 4.1.0 (2021-05-18)", + "runtimePlatform": "R version 4.3.2 (2023-10-31)", + "provider": { + "@id": "https://cran.r-project.org", + "@type": "Organization", + "name": "Comprehensive R Archive Network (CRAN)", + "url": "https://cran.r-project.org" + }, "author": [ { "@type": "Person", "givenName": ["William", "Michael"], "familyName": "Landau", - "email": "will.landau@gmail.com", + "email": "will.landau.oss@gmail.com", "@id": "https://orcid.org/0000-0003-1878-3253" } ], - "contributor": {}, "copyrightHolder": [ { "@type": "Organization", "name": "Eli Lilly and Company" } ], - "funder": {}, "maintainer": [ { "@type": "Person", "givenName": ["William", "Michael"], "familyName": "Landau", - "email": "will.landau@gmail.com", + "email": "will.landau.oss@gmail.com", "@id": "https://orcid.org/0000-0003-1878-3253" } ], @@ -75,6 +72,19 @@ }, "sameAs": "https://CRAN.R-project.org/package=knitr" }, + { + "@type": "SoftwareApplication", + "identifier": "quarto", + "name": "quarto", + "version": ">= 1.4", + "provider": { + "@id": "https://cran.r-project.org", + "@type": "Organization", + "name": "Comprehensive R Archive Network (CRAN)", + "url": "https://cran.r-project.org" + }, + "sameAs": "https://CRAN.R-project.org/package=quarto" + }, { "@type": "SoftwareApplication", "identifier": "rmarkdown", @@ -115,27 +125,14 @@ "sameAs": "https://CRAN.R-project.org/package=xml2" } ], - "softwareRequirements": [ - { + "softwareRequirements": { + "1": { "@type": "SoftwareApplication", "identifier": "R", "name": "R", "version": ">= 3.5.0" }, - { - "@type": "SoftwareApplication", - "identifier": "digest", - "name": "digest", - "version": ">= 0.6.25", - "provider": { - "@id": "https://cran.r-project.org", - "@type": "Organization", - "name": "Comprehensive R Archive Network (CRAN)", - "url": "https://cran.r-project.org" - }, - "sameAs": "https://CRAN.R-project.org/package=digest" - }, - { + "2": { "@type": "SoftwareApplication", "identifier": "dplyr", "name": "dplyr", @@ -148,7 +145,7 @@ }, "sameAs": "https://CRAN.R-project.org/package=dplyr" }, - { + "3": { "@type": "SoftwareApplication", "identifier": "fs", "name": "fs", @@ -161,7 +158,12 @@ }, "sameAs": "https://CRAN.R-project.org/package=fs" }, - { + "4": { + "@type": "SoftwareApplication", + "identifier": "parallel", + "name": "parallel" + }, + "5": { "@type": "SoftwareApplication", "identifier": "rlang", "name": "rlang", @@ -174,11 +176,24 @@ }, "sameAs": "https://CRAN.R-project.org/package=rlang" }, - { + "6": { + "@type": "SoftwareApplication", + "identifier": "secretbase", + "name": "secretbase", + "version": ">= 0.4.0", + "provider": { + "@id": "https://cran.r-project.org", + "@type": "Organization", + "name": "Comprehensive R Archive Network (CRAN)", + "url": "https://cran.r-project.org" + }, + "sameAs": "https://CRAN.R-project.org/package=secretbase" + }, + "7": { "@type": "SoftwareApplication", "identifier": "targets", "name": "targets", - "version": ">= 0.6.0", + "version": ">= 1.6.0", "provider": { "@id": "https://cran.r-project.org", "@type": "Organization", @@ -187,7 +202,7 @@ }, "sameAs": "https://CRAN.R-project.org/package=targets" }, - { + "8": { "@type": "SoftwareApplication", "identifier": "tibble", "name": "tibble", @@ -200,7 +215,7 @@ }, "sameAs": "https://CRAN.R-project.org/package=tibble" }, - { + "9": { "@type": "SoftwareApplication", "identifier": "tidyselect", "name": "tidyselect", @@ -213,12 +228,12 @@ }, "sameAs": "https://CRAN.R-project.org/package=tidyselect" }, - { + "10": { "@type": "SoftwareApplication", "identifier": "utils", "name": "utils" }, - { + "11": { "@type": "SoftwareApplication", "identifier": "vctrs", "name": "vctrs", @@ -231,7 +246,7 @@ }, "sameAs": "https://CRAN.R-project.org/package=vctrs" }, - { + "12": { "@type": "SoftwareApplication", "identifier": "withr", "name": "withr", @@ -243,37 +258,10 @@ "url": "https://cran.r-project.org" }, "sameAs": "https://CRAN.R-project.org/package=withr" - } - ], - "releaseNotes": "https://github.com/ropensci/tarchetypes/blob/master/NEWS.md", - "readme": "https://github.com/ropensci/tarchetypes/blob/main/README.md", - "fileSize": "857.081KB", - "contIntegration": ["https://github.com/ropensci/tarchetypes/actions?query=workflow%3Acheck", "https://app.codecov.io/gh/ropensci/tarchetypes", "https://github.com/ropensci/tarchetypes/actions?query=workflow%3Alint"], - "developmentStatus": "https://www.repostatus.org/#active", - "review": { - "@type": "Review", - "url": "https://github.com/ropensci/software-review/issues/401", - "provider": "https://ropensci.org" - }, - "keywords": [ - "reproducibility", - "high-performance-computing", - "r", - "data-science", - "rstats", - "pipeline", - "r-package", - "workflow", - "targets", - "r-targetopia", - "peer-reviewed" - ], - "provider": { - "@id": "https://cran.r-project.org", - "@type": "Organization", - "name": "Comprehensive R Archive Network (CRAN)", - "url": "https://cran.r-project.org" + }, + "SystemRequirements": null }, + "fileSize": "1209.095KB", "citation": [ { "@type": "SoftwareSourceCode", @@ -288,5 +276,15 @@ "name": "tarchetypes: Archetypes for Targets", "description": "{https://docs.ropensci.org/tarchetypes/, https://github.com/ropensci/tarchetypes}" } - ] + ], + "releaseNotes": "https://github.com/ropensci/tarchetypes/blob/master/NEWS.md", + "readme": "https://github.com/ropensci/tarchetypes/blob/main/README.md", + "contIntegration": ["https://github.com/ropensci/tarchetypes/actions?query=workflow%3Acheck", "https://app.codecov.io/gh/ropensci/tarchetypes", "https://github.com/ropensci/tarchetypes/actions?query=workflow%3Alint"], + "developmentStatus": "https://www.repostatus.org/#active", + "review": { + "@type": "Review", + "url": "https://github.com/ropensci/software-review/issues/401", + "provider": "https://ropensci.org" + }, + "keywords": ["reproducibility", "high-performance-computing", "r", "data-science", "rstats", "pipeline", "r-package", "workflow", "targets", "r-targetopia", "peer-reviewed"] } diff --git a/tests/testthat/test-tar_map_rep.R b/tests/testthat/test-tar_map_rep.R index 6559bfde..bca8306f 100644 --- a/tests/testthat/test-tar_map_rep.R +++ b/tests/testthat/test-tar_map_rep.R @@ -362,7 +362,7 @@ targets::tar_test("tar_map_rep() seeds are resilient to re-batching", { skip_on_cran() targets::tar_script({ f <- function(x) { - out <- digest::digest( + out <- secretbase::siphash13( paste(c(x, sample.int(n = 1e9, size = 1000)), collapse = "_") ) data.frame(x = out) @@ -381,7 +381,7 @@ targets::tar_test("tar_map_rep() seeds are resilient to re-batching", { out1$tar_rep <- NULL targets::tar_script({ f <- function(x) { - out <- digest::digest( + out <- secretbase::siphash13( paste(c(x, sample.int(n = 1e9, size = 1000)), collapse = "_") ) data.frame(x = out) @@ -400,7 +400,7 @@ targets::tar_test("tar_map_rep() seeds are resilient to re-batching", { out2$tar_rep <- NULL targets::tar_script({ f <- function(x) { - out <- digest::digest( + out <- secretbase::siphash13( paste(c(x, sample.int(n = 1e9, size = 1000)), collapse = "_") ) data.frame(x = out) @@ -427,7 +427,7 @@ targets::tar_test("tar_map_rep() seeds change with the seed option", { targets::tar_script({ tar_option_set(seed = 1L) f <- function(x) { - out <- digest::digest( + out <- secretbase::siphash13( paste(c(x, sample.int(n = 1e9, size = 1000)), collapse = "_") ) data.frame(x = out) @@ -448,7 +448,7 @@ targets::tar_test("tar_map_rep() seeds change with the seed option", { targets::tar_script({ tar_option_set(seed = 2L) f <- function(x) { - out <- digest::digest( + out <- secretbase::siphash13( paste(c(x, sample.int(n = 1e9, size = 1000)), collapse = "_") ) data.frame(x = out) @@ -466,7 +466,7 @@ targets::tar_test("tar_map_rep() seeds change with the seed option", { targets::tar_script({ tar_option_set(seed = NA) f <- function(x) { - out <- digest::digest( + out <- secretbase::siphash13( paste(c(x, sample.int(n = 1e9, size = 1000)), collapse = "_") ) data.frame(x = out) diff --git a/tests/testthat/test-tar_rep.R b/tests/testthat/test-tar_rep.R index 1140f069..44ffc75d 100644 --- a/tests/testthat/test-tar_rep.R +++ b/tests/testthat/test-tar_rep.R @@ -119,7 +119,9 @@ targets::tar_test("tar_rep() seeds are resilient to re-batching", { skip_on_cran() targets::tar_script({ f <- function() { - digest::digest(paste(sample.int(n = 1e9, size = 1000), collapse = "_")) + secretbase::siphash13( + paste(sample.int(n = 1e9, size = 1000), collapse = "_") + ) } tarchetypes::tar_rep(x, f(), batches = 1, reps = 4) }) @@ -127,7 +129,9 @@ targets::tar_test("tar_rep() seeds are resilient to re-batching", { out1 <- unname(targets::tar_read(x)) targets::tar_script({ f <- function() { - digest::digest(paste(sample.int(n = 1e9, size = 1000), collapse = "_")) + secretbase::siphash13( + paste(sample.int(n = 1e9, size = 1000), collapse = "_") + ) } tarchetypes::tar_rep(x, f(), batches = 2, reps = 2) }) @@ -135,7 +139,9 @@ targets::tar_test("tar_rep() seeds are resilient to re-batching", { out2 <- unname(targets::tar_read(x)) targets::tar_script({ f <- function() { - digest::digest(paste(sample.int(n = 1e9, size = 1000), collapse = "_")) + secretbase::siphash13( + paste(sample.int(n = 1e9, size = 1000), collapse = "_") + ) } tarchetypes::tar_rep(x, f(), batches = 4, reps = 1) }) @@ -151,7 +157,9 @@ targets::tar_test("tar_rep() seeds change with the seed option", { targets::tar_script({ tar_option_set(seed = 1L) f <- function() { - digest::digest(paste(sample.int(n = 1e9, size = 1000), collapse = "_")) + secretbase::siphash13( + paste(sample.int(n = 1e9, size = 1000), collapse = "_") + ) } tarchetypes::tar_rep(x, f(), batches = 2, reps = 2) }) @@ -163,7 +171,9 @@ targets::tar_test("tar_rep() seeds change with the seed option", { targets::tar_script({ tar_option_set(seed = 2L) f <- function() { - digest::digest(paste(sample.int(n = 1e9, size = 1000), collapse = "_")) + secretbase::siphash13( + paste(sample.int(n = 1e9, size = 1000), collapse = "_") + ) } tarchetypes::tar_rep(x, f(), batches = 2, reps = 2) }) @@ -172,7 +182,9 @@ targets::tar_test("tar_rep() seeds change with the seed option", { targets::tar_script({ tar_option_set(seed = NA) f <- function() { - digest::digest(paste(sample.int(n = 1e9, size = 1000), collapse = "_")) + secretbase::siphash13( + paste(sample.int(n = 1e9, size = 1000), collapse = "_") + ) } tarchetypes::tar_rep(x, f(), batches = 2, reps = 2) }) @@ -193,10 +205,10 @@ targets::tar_test("tar_rep() seeds change with the seed option", { targets::tar_test("correct RNG state", { skip_on_cran() targets::tar_script({ - targets::tar_option_set(packages = c("digest", "tibble")) + targets::tar_option_set(packages = c("secretbase", "tibble")) tar_rep( name = results, - command = tibble(seed_hash = digest(.Random.seed)), + command = tibble(seed_hash = siphash13(.Random.seed)), batches = 1L, reps = 3L, rep_workers = 2L @@ -206,7 +218,7 @@ targets::tar_test("correct RNG state", { out <- tar_read(results) for (rep in seq_len(3L)) { set.seed(seed = out$tar_seed[rep], kind = "default") - expect_equal(out$seed_hash[rep], digest::digest(.Random.seed)) + expect_equal(out$seed_hash[rep], secretbase::siphash13(.Random.seed)) } }) diff --git a/tests/testthat/test-tar_rep2.R b/tests/testthat/test-tar_rep2.R index 4059d354..77feaffc 100644 --- a/tests/testthat/test-tar_rep2.R +++ b/tests/testthat/test-tar_rep2.R @@ -288,7 +288,7 @@ targets::tar_test("tar_rep2() seeds are resilient to re-batching", { targets::tar_script({ f <- function() { tibble::tibble( - x = digest::digest( + x = secretbase::siphash13( paste(sample.int(n = 1e9, size = 1000), collapse = "_") ) ) @@ -296,7 +296,7 @@ targets::tar_test("tar_rep2() seeds are resilient to re-batching", { g <- function(x) { x$x <- paste0( x$x, - digest::digest( + secretbase::siphash13( paste(sample.int(n = 1e9, size = 1000), collapse = "_") ) ) @@ -314,7 +314,7 @@ targets::tar_test("tar_rep2() seeds are resilient to re-batching", { targets::tar_script({ f <- function() { tibble::tibble( - x = digest::digest( + x = secretbase::siphash13( paste(sample.int(n = 1e9, size = 1000), collapse = "_") ) ) @@ -322,7 +322,7 @@ targets::tar_test("tar_rep2() seeds are resilient to re-batching", { g <- function(x) { x$x <- paste0( x$x, - digest::digest( + secretbase::siphash13( paste(sample.int(n = 1e9, size = 1000), collapse = "_") ) ) @@ -340,7 +340,7 @@ targets::tar_test("tar_rep2() seeds are resilient to re-batching", { targets::tar_script({ f <- function() { tibble::tibble( - x = digest::digest( + x = secretbase::siphash13( paste(sample.int(n = 1e9, size = 1000), collapse = "_") ) ) @@ -348,7 +348,7 @@ targets::tar_test("tar_rep2() seeds are resilient to re-batching", { g <- function(x) { x$x <- paste0( x$x, - digest::digest( + secretbase::siphash13( paste(sample.int(n = 1e9, size = 1000), collapse = "_") ) ) @@ -374,7 +374,7 @@ targets::tar_test("tar_rep2() seeds change with the seed option", { tar_option_set(seed = 1L) f <- function() { tibble::tibble( - x = digest::digest( + x = secretbase::siphash13( paste(sample.int(n = 1e9, size = 1000), collapse = "_") ) ) @@ -382,7 +382,7 @@ targets::tar_test("tar_rep2() seeds change with the seed option", { g <- function(x) { x$x <- paste0( x$x, - digest::digest( + secretbase::siphash13( paste(sample.int(n = 1e9, size = 1000), collapse = "_") ) ) @@ -402,7 +402,7 @@ targets::tar_test("tar_rep2() seeds change with the seed option", { tar_option_set(seed = 2L) f <- function() { tibble::tibble( - x = digest::digest( + x = secretbase::siphash13( paste(sample.int(n = 1e9, size = 1000), collapse = "_") ) ) @@ -410,7 +410,7 @@ targets::tar_test("tar_rep2() seeds change with the seed option", { g <- function(x) { x$x <- paste0( x$x, - digest::digest( + secretbase::siphash13( paste(sample.int(n = 1e9, size = 1000), collapse = "_") ) ) @@ -427,7 +427,7 @@ targets::tar_test("tar_rep2() seeds change with the seed option", { tar_option_set(seed = NA) f <- function() { tibble::tibble( - x = digest::digest( + x = secretbase::siphash13( paste(sample.int(n = 1e9, size = 1000), collapse = "_") ) ) @@ -435,7 +435,7 @@ targets::tar_test("tar_rep2() seeds change with the seed option", { g <- function(x) { x$x <- paste0( x$x, - digest::digest( + secretbase::siphash13( paste(sample.int(n = 1e9, size = 1000), collapse = "_") ) )