From 71bf77c045db01b033acf1479250eee5577a1d4f Mon Sep 17 00:00:00 2001 From: Mahmoud Hallal <86970066+mhallal1@users.noreply.github.com> Date: Fri, 11 Nov 2022 15:57:09 +0100 Subject: [PATCH] remove hashing (#119) related to https://github.com/insightsengineering/teal/issues/751 remove datasets hashing and transfer it to `teal`. Co-authored-by: 27856297+dependabot-preview[bot]@users.noreply.github.com <27856297+dependabot-preview[bot]@users.noreply.github.com> --- NEWS.md | 1 + R/MAETealDataset.R | 7 +++--- R/TealDataset.R | 42 +++++++++++++++---------------- man/CDISCTealDataset.Rd | 1 - man/dataset_class.Rd | 15 ----------- tests/testthat/test-TealData.R | 10 -------- tests/testthat/test-TealDataset.R | 14 ----------- 7 files changed, 25 insertions(+), 65 deletions(-) diff --git a/NEWS.md b/NEWS.md index d7ea60a0c..db4208fe6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,7 @@ * Examples now use `scda.2022` instead of `scda.2021`. * Modified `teal.Dataset$print` method for a less cluttered output. +* Transferred data hashing step in `TealDataset` and `MAETealDataset` to `teal`. # teal.data 0.1.2 diff --git a/R/MAETealDataset.R b/R/MAETealDataset.R index 2f2da6492..20c9f4594 100644 --- a/R/MAETealDataset.R +++ b/R/MAETealDataset.R @@ -75,7 +75,6 @@ MAETealDataset <- R6::R6Class( # nolint self$set_vars(vars) self$set_dataset_label(label) self$set_keys(keys) - private$calculate_hash() # needed if recreating dataset - we need to preserve code order and uniqueness private$code <- CodeClass$new() @@ -96,7 +95,9 @@ MAETealDataset <- R6::R6Class( # nolint #' `TRUE` if the dataset generated from evaluating the #' `get_code()` code is identical to the raw data, else `FALSE`. check = function() { - logger::log_trace("TealDataset$check executing the code to reproduce dataset: { deparse1(self$get_dataname()) }...") + logger::log_trace( + "TealDataset$check executing the code to reproduce dataset: { deparse1(self$get_dataname()) }..." + ) if (!checkmate::test_character(self$get_code(), len = 1, pattern = "\\w+")) { stop( sprintf( @@ -274,7 +275,7 @@ MAETealDataset <- R6::R6Class( # nolint #' mae_d$get_code() #' mae_d$get_raw_data() #' @export -dataset.MultiAssayExperiment <- function(dataname, +dataset.MultiAssayExperiment <- function(dataname, # nolint x, keys = character(0), label = data_label(x), diff --git a/R/TealDataset.R b/R/TealDataset.R index 63e6fec59..5b6bd0aff 100644 --- a/R/TealDataset.R +++ b/R/TealDataset.R @@ -73,7 +73,6 @@ TealDataset <- R6::R6Class( # nolint self$set_vars(vars) self$set_dataset_label(label) self$set_keys(keys) - private$calculate_hash() # needed if recreating dataset - we need to preserve code order and uniqueness private$code <- CodeClass$new() @@ -256,12 +255,6 @@ TealDataset <- R6::R6Class( # nolint private$join_keys }, #' @description - #' Returns the string representation of the raw data hashed with the MD5 hash algorithm. - #' @return `character` the hash of the raw data - get_hash = function() { - private$data_hash - }, - #' @description #' Get the list of dependencies that are `TealDataset` or `TealDatasetConnector` objects #' #' @return `list` @@ -303,7 +296,9 @@ TealDataset <- R6::R6Class( # nolint common_mutate_vars <- intersect(names(datasets), names(private$mutate_vars)) private$mutate_vars[common_mutate_vars] <- datasets[common_mutate_vars] - logger::log_trace("TealDataset$reassign_datasets_vars reassigned vars for dataset: { deparse1(self$get_dataname()) }.") + logger::log_trace( + "TealDataset$reassign_datasets_vars reassigned vars for dataset: { deparse1(self$get_dataname()) }." + ) invisible(NULL) }, #' @description @@ -316,7 +311,9 @@ TealDataset <- R6::R6Class( # nolint checkmate::assert_character(label, max.len = 1, any.missing = FALSE) private$dataset_label <- label - logger::log_trace("TealDataset$set_dataset_label dataset_label set for dataset: { deparse1(self$get_dataname()) }.") + logger::log_trace( + "TealDataset$set_dataset_label dataset_label set for dataset: { deparse1(self$get_dataname()) }." + ) return(invisible(self)) }, #' @description @@ -360,7 +357,10 @@ TealDataset <- R6::R6Class( # nolint mutate_join_keys = function(dataset, val) { self$get_join_keys()$mutate(private$dataname, dataset, val) logger::log_trace( - "TealDatasetConnector$mutate_join_keys join_keys modified keys of { deparse1(self$get_dataname()) } against { dataset }." + paste0( + "TealDatasetConnector$mutate_join_keys join_keys modified keys", + "of { deparse1(self$get_dataname()) } against { dataset }." + ) ) return(invisible(self)) }, @@ -523,7 +523,9 @@ TealDataset <- R6::R6Class( # nolint #' `TRUE` if the dataset generated from evaluating the #' `get_code()` code is identical to the raw data, else `FALSE`. check = function() { - logger::log_trace("TealDataset$check executing the code to reproduce dataset: { deparse1(self$get_dataname()) }...") + logger::log_trace( + "TealDataset$check executing the code to reproduce dataset: { deparse1(self$get_dataname()) }..." + ) if (!checkmate::test_character(self$get_code(), len = 1, pattern = "\\w+")) { stop( sprintf( @@ -596,7 +598,6 @@ TealDataset <- R6::R6Class( # nolint .keys = character(0), mutate_code = list(), mutate_vars = list(), - data_hash = character(0), join_keys = NULL, ## __Private Methods ==== @@ -614,7 +615,9 @@ TealDataset <- R6::R6Class( # nolint return(invisible(self)) }, mutate_eager = function() { - logger::log_trace("TealDatasetConnector$mutate_eager executing mutate code for dataset: { deparse1(self$get_dataname()) }...") + logger::log_trace( + "TealDatasetConnector$mutate_eager executing mutate code for dataset: { deparse1(self$get_dataname()) }..." + ) new_df <- private$execute_code( code = private$mutate_list_to_code_class(), vars = c( @@ -628,7 +631,7 @@ TealDataset <- R6::R6Class( # nolint # code set after successful evaluation # otherwise code != dataset - # private$code$append(private$mutate_code) + # private$code$append(private$mutate_code) # nolint private$append_mutate_code() self$set_vars(private$mutate_vars) private$mutate_code <- list() @@ -642,7 +645,9 @@ TealDataset <- R6::R6Class( # nolint vars = list() ) - logger::log_trace("TealDatasetConnector$mutate_eager executed mutate code for dataset: { deparse1(self$get_dataname()) }.") + logger::log_trace( + "TealDatasetConnector$mutate_eager executed mutate code for dataset: { deparse1(self$get_dataname()) }." + ) new_self }, @@ -788,13 +793,6 @@ TealDataset <- R6::R6Class( # nolint return(new_set) }, - # Calculates the MD5 hash of the raw data stored in this TealDataset. - # @return NULL - calculate_hash = function() { - private$data_hash <- digest::digest(self$get_raw_data(), algo = "md5") - NULL - }, - # Set the name for the dataset # @param dataname (`character`) the new name # @return self invisibly for chaining diff --git a/man/CDISCTealDataset.Rd b/man/CDISCTealDataset.Rd index be5a18f83..b7c02119b 100644 --- a/man/CDISCTealDataset.Rd +++ b/man/CDISCTealDataset.Rd @@ -57,7 +57,6 @@ x$get_parent()
  • teal.data::TealDataset$get_dataset()
  • teal.data::TealDataset$get_dataset_label()
  • teal.data::TealDataset$get_factor_colnames()
  • -
  • teal.data::TealDataset$get_hash()
  • teal.data::TealDataset$get_join_keys()
  • teal.data::TealDataset$get_keys()
  • teal.data::TealDataset$get_metadata()
  • diff --git a/man/dataset_class.Rd b/man/dataset_class.Rd index d5e58fcc7..4a8d6d336 100644 --- a/man/dataset_class.Rd +++ b/man/dataset_class.Rd @@ -61,7 +61,6 @@ test_dataset$reassign_datasets_vars(
  • teal.data::TealDataset$get_dataset()
  • teal.data::TealDataset$get_dataset_label()
  • teal.data::TealDataset$get_factor_colnames()
  • -
  • teal.data::TealDataset$get_hash()
  • teal.data::TealDataset$get_join_keys()
  • teal.data::TealDataset$get_keys()
  • teal.data::TealDataset$get_metadata()
  • @@ -314,7 +313,6 @@ The objects of this class are cloneable with this method. \item \href{#method-TealDataset-get_keys}{\code{TealDataset$get_keys()}} \item \href{#method-TealDataset-get_metadata}{\code{TealDataset$get_metadata()}} \item \href{#method-TealDataset-get_join_keys}{\code{TealDataset$get_join_keys()}} -\item \href{#method-TealDataset-get_hash}{\code{TealDataset$get_hash()}} \item \href{#method-TealDataset-get_var_r6}{\code{TealDataset$get_var_r6()}} \item \href{#method-TealDataset-reassign_datasets_vars}{\code{TealDataset$reassign_datasets_vars()}} \item \href{#method-TealDataset-set_dataset_label}{\code{TealDataset$set_dataset_label()}} @@ -701,19 +699,6 @@ Get \code{JoinKeys} object with keys used for joining. } } \if{html}{\out{
    }} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-TealDataset-get_hash}{}}} -\subsection{Method \code{get_hash()}}{ -Returns the string representation of the raw data hashed with the MD5 hash algorithm. -\subsection{Usage}{ -\if{html}{\out{
    }}\preformatted{TealDataset$get_hash()}\if{html}{\out{
    }} -} - -\subsection{Returns}{ -\code{character} the hash of the raw data -} -} -\if{html}{\out{
    }} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-TealDataset-get_var_r6}{}}} \subsection{Method \code{get_var_r6()}}{ diff --git a/tests/testthat/test-TealData.R b/tests/testthat/test-TealData.R index d25c28bcb..e4078c89f 100644 --- a/tests/testthat/test-TealData.R +++ b/tests/testthat/test-TealData.R @@ -147,16 +147,6 @@ test_that("deep clone", { expect_false(rlang::is_reference(x$get_join_keys(), x_copy$get_join_keys())) }) -testthat::test_that("The hashes of TealDatasets objects are correct after mutating the TealData object", { - mutated_iris <- iris - mutated_iris$test <- 1 - mutated_iris_hash <- digest::digest(mutated_iris, algo = "md5") - rd <- teal_data(dataset("iris", iris)) - mutate_data(rd, code = "iris$test <- 1") - rd$execute_mutate() - testthat::expect_equal(rd$get_dataset("iris")$get_hash(), mutated_iris_hash) -}) - testthat::test_that("execute_mutate returns current datasets if no mutate_code", { pull_fun <- callable_function(data.frame) pull_fun$set_args(args = list(head_letters = head(letters))) diff --git a/tests/testthat/test-TealDataset.R b/tests/testthat/test-TealDataset.R index 4740a4fe6..5d9a7c5ff 100644 --- a/tests/testthat/test-TealDataset.R +++ b/tests/testthat/test-TealDataset.R @@ -445,12 +445,6 @@ testthat::test_that("TealDataset$check returns FALSE if the passed code creates testthat::expect_false(test_ds0$check()) }) -testthat::test_that("get_hash returns the hash of the object passed to the constructor", { - iris_hash <- digest::digest(iris, algo = "md5") - ds <- TealDataset$new("iris", iris) - testthat::expect_equal(ds$get_hash(), iris_hash) -}) - testthat::test_that("get_code_class returns the correct CodeClass object", { cc1 <- CodeClass$new(code = "iris <- head(iris)", dataname = "iris") cc2 <- CodeClass$new(code = "mtcars <- head(mtcars)", dataname = "mtcars", deps = "iris") @@ -758,14 +752,6 @@ test_that("mutate_dataset with vars argument", { ) }) -testthat::test_that("get_hash returns the correct hash after mutating the TealDataset object", { - mutated_iris <- iris - mutated_iris$test <- 1 - mutated_iris_hash <- digest::digest(mutated_iris, algo = "md5") - ds <- TealDataset$new("iris", iris) %>% mutate_dataset("iris$test <- 1") - testthat::expect_equal(ds$get_hash(), mutated_iris_hash) -}) - testthat::test_that("dataset$merge_join_keys does not throw on basic input", { dataset1 <- TealDataset$new("iris", head(iris)) dataset1$set_join_keys(join_key("iris", "other_dataset", c("Species" = "some_col")))